ct_regex_internal/codegen/create_type/
parse.rs1use proc_macro2::TokenStream;
2use regex_syntax::{ast::{Ast, ClassAscii, ClassAsciiKind, ClassBracketed, ClassPerl, ClassPerlKind, ClassSet, ClassSetBinaryOp, ClassSetItem, parse::ParserBuilder}, hir::translate::TranslatorBuilder};
3
4use crate::codegen::{CodegenItem, Group, HirExtension};
5
6pub fn parse_regex<I: CodegenItem>(pat: &str, config: &ConfigExt) -> (TokenStream, Vec<Group>) {
7 let mut ast = config.ast.build()
8 .parse(&pat)
9 .expect("failed to parse regex");
10
11 if !config.complex_classes {
12 simplify_classes(&mut ast);
13 }
14
15 config.hir.build()
16 .translate(&pat, &ast)
17 .expect("failed to parse regex")
18 .into_matcher::<I>()
19}
20
21pub fn simplify_classes(ast: &mut Ast) {
22 let replacement = match ast {
23 Ast::ClassPerl(class) => replace_perl_class(class),
24 Ast::ClassBracketed(class) => return replace_in_class(&mut class.kind),
25 Ast::Repetition(rep) => return simplify_classes(&mut rep.ast),
26 Ast::Group(group) => return simplify_classes(&mut group.ast),
27 Ast::Alternation(alt) => return alt.asts.iter_mut().for_each(simplify_classes),
28 Ast::Concat(cat) => return cat.asts.iter_mut().for_each(simplify_classes),
29 _ => return,
30 };
31 *ast = Ast::ClassBracketed(Box::new(ClassBracketed {
32 span: *ast.span(),
33 negated: false,
34 kind: ClassSet::Item(ClassSetItem::Ascii(replacement))
35 }));
36}
37
38pub fn replace_in_class(class: &mut ClassSet) {
39 match class {
40 ClassSet::BinaryOp(ClassSetBinaryOp { lhs, rhs, .. }) => {
41 replace_in_class(lhs);
42 replace_in_class(rhs);
43 },
44 ClassSet::Item(item) => replace_in_class_set_item(item),
45 }
46}
47
48pub fn replace_in_class_set_item(item: &mut ClassSetItem) {
49 let replacement = match item {
50 ClassSetItem::Perl(class) => replace_perl_class(class),
51 ClassSetItem::Bracketed(class) => return replace_in_class(&mut class.kind),
52 ClassSetItem::Union(class) => {
53 return class.items.iter_mut().for_each(replace_in_class_set_item);
54 },
55 _ => return,
56 };
57 *item = ClassSetItem::Ascii(replacement);
58}
59
60pub fn replace_perl_class(class: &mut ClassPerl) -> ClassAscii {
61 ClassAscii {
62 span: class.span,
63 negated: class.negated,
64 kind: match class.kind {
65 ClassPerlKind::Digit => ClassAsciiKind::Digit,
66 ClassPerlKind::Space => ClassAsciiKind::Space,
67 ClassPerlKind::Word => ClassAsciiKind::Word,
68 },
69 }
70}
71
72#[derive(#[automatically_derived]
impl ::core::fmt::Debug for ConfigExt {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field3_finish(f, "ConfigExt",
"ast", &self.ast, "hir", &self.hir, "complex_classes",
&&self.complex_classes)
}
}Debug, #[automatically_derived]
impl ::core::default::Default for ConfigExt {
#[inline]
fn default() -> ConfigExt {
ConfigExt {
ast: ::core::default::Default::default(),
hir: ::core::default::Default::default(),
complex_classes: ::core::default::Default::default(),
}
}
}Default, #[automatically_derived]
impl ::core::clone::Clone for ConfigExt {
#[inline]
fn clone(&self) -> ConfigExt {
ConfigExt {
ast: ::core::clone::Clone::clone(&self.ast),
hir: ::core::clone::Clone::clone(&self.hir),
complex_classes: ::core::clone::Clone::clone(&self.complex_classes),
}
}
}Clone)]
73pub struct ConfigExt {
74 pub ast: ParserBuilder,
75 pub hir: TranslatorBuilder,
76 pub complex_classes: bool,
77}
78
79macro_rules! impl_hir_methods {
80 ($name:ident) => {
81 pub fn $name(&mut self, flag: bool) -> &mut Self {
82 self.hir.$name(flag);
83 self
84 }
85 };
86 ($name:ident, $($others:ident),+) => {
87 impl_hir_methods! { $name }
88 impl_hir_methods! { $($others),+ }
89 };
90}
91
92impl ConfigExt {
93 self
flag
self.hir.utf8(flag);
self;impl_hir_methods! {
94 case_insensitive,
95 multi_line,
96 dot_matches_new_line,
97 crlf,
98 swap_greed,
99 unicode,
100 utf8
101 }
102
103 pub fn ignore_whitespace(&mut self, flag: bool) -> &mut Self {
104 self.ast.ignore_whitespace(flag);
105 self
106 }
107
108 pub fn complex_classes(&mut self, flag: bool) -> &mut Self {
109 self.complex_classes = flag;
110 self
111 }
112}