ct_regex_internal/matcher/interface.rs
1use std::fmt::Debug;
2
3use crate::expr::IndexedCaptures;
4use crate::haystack::{HaystackItem, HaystackOf};
5
6pub trait Matcher<I: HaystackItem>: Debug + Default + Clone + Copy {
7 type AllMatches<'a, H: HaystackOf<'a, I>>: Iterator<Item = usize>;
8 type AllCaptures<'a, H: HaystackOf<'a, I>>: Iterator<Item = (usize, IndexedCaptures)>;
9
10 /// Checks if the start of the haystack contains a match for this [`Matcher`]. If this method
11 /// successfully matches the start of the haystack, `hay` is progressed so that `hay.item()`
12 /// hasn't been matched yet. On a fail, the state of hay is undefined.
13 fn matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> bool;
14
15 // It would be nice to use a custom Iterator here rather than a Vec, but reversing an arbitrary
16 // match is not easy, so we just progress through linearly and store them all.
17 // This could cause issues with huge haystacks, but: all regexes need to be compiled at compile
18 // time and are hence controlled by the author. If their pattern will be operating on huge
19 // haystacks and need backtracking, that's up to them.
20
21 /// Produces a Vec of all valid haystack states produced as the result of a valid match at the
22 /// start of `hay`, used to implement backtracking. The Vec is produced in reverse priority
23 /// order, so the last match has the highest priority. After calling all_matches, the state of
24 /// `hay` itself is undefined.
25 ///
26 /// # Required
27 /// This method needs to be implemented by all [`Matcher`]s that can match more than one string
28 /// of characters from a haystack.
29 fn all_matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> Self::AllMatches<'a, H>;
30
31 /// Checks if the start of the haystack contains a match for this Matcher, writing any groups
32 /// to `caps`. Similar to [`matches`], this method progresses `hay` and `caps` on a success. On
33 /// a fail, they have undefined states.
34 ///
35 /// # Required
36 /// This method needs to be implemented for capturing groups or any type that holds other
37 /// [`Matcher`]s, so that it can redirect to the relevant `capture` methods.
38 fn captures<'a, H: HaystackOf<'a, I>>(hay: &mut H, caps: &mut IndexedCaptures) -> bool {
39 let _ = caps;
40 Self::matches(hay)
41 }
42
43 /// Produces a Vec of all valid captures (and accompanying haystack states) present at the start
44 /// of `hay`. Used to implement backtracking for capturing methods. As with
45 /// [`all_matches`](Matcher::all_matches), the resulting Vec is produced in reverse priority
46 /// order. After calling all_captures, the state of `hay` and `caps` are undefined.
47 ///
48 /// # Required
49 /// This method needs to be implemented for any type that also implements
50 /// [`captures`](Matcher::captures) and [`all_matches`](Matcher::all_matches).
51 fn all_captures<'a, H: HaystackOf<'a, I>>(
52 hay: &mut H,
53 caps: &mut IndexedCaptures,
54 ) -> Self::AllCaptures<'a, H>;
55}
56
57// I'm just going to write this down cause I seem to keep forgetting. For alternations, either
58// branch may match a longer sequence in the haystack, but the first branch should be the priority.
59// This would complicate lazy semantics beyond just reversing the order of return values if it
60// wasn't for the simple fact that *alternations can't be lazy*. Laziness only applies to
61// quantifiers, even if that quantifiers is a 0-1 ("??" in the expression). Because there is always
62// a type between Lazy and Or in the type expression, it doesn't matter.
63// - Or<Lazy<..>, Lazy<..>> is valid and has clear semantics. No need for any special logic.
64// - Lazy<QuantifierNOrMore<Or<..>, N>> is also valid and matches Or eagerly during every
65// repetition, rolling back if necessary.
66// - Lazy<Or<..>> is where things would get complicated, but there is no way to express it in an
67// expression.
68
69pub trait LazyMatcher<I: HaystackItem>: Matcher<I> {
70 type LazyAllMatches<'a, H: HaystackOf<'a, I>>: Iterator<Item = usize>;
71 type LazyAllCaptures<'a, H: HaystackOf<'a, I>>: Iterator<Item = (usize, IndexedCaptures)>;
72
73 /// Functions exactly the same as [`Matcher::matches`], except that the haystack's index is left
74 /// at the first location where the match is considered successful.
75 fn lazy_matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> bool;
76
77 /// Functions exactly the same as [`Matcher::all_matches`], except that the indices are produced
78 /// in the opposite order. The first value returned by the iterator is the one first encountered
79 /// in the haystack, not the one that produces the longest match.
80 fn lazy_all_matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> Self::LazyAllMatches<'a, H>;
81
82 /// Functions exactly the same as [`Matcher::captures`], except that the haystack's index and
83 /// captures represent the first location where the match is considered successful.
84 fn lazy_captures<'a, H: HaystackOf<'a, I>>(hay: &mut H, caps: &mut IndexedCaptures) -> bool;
85
86 /// Functions exactly the same as [`Matcher::all_captures`], except that the indices and
87 /// captures are produced in the opposite order. The first value returned by the iterator is the
88 /// one first encountered in the haystack, not the one that produces the longest match.
89 fn lazy_all_captures<'a, H: HaystackOf<'a, I>>(
90 hay: &mut H,
91 caps: &mut IndexedCaptures,
92 ) -> Self::LazyAllCaptures<'a, H>;
93}