Skip to main content

ct_regex_internal/matcher/
interface.rs

1use std::fmt::Debug;
2
3use crate::expr::IndexedCaptures;
4use crate::haystack::{HaystackItem, HaystackOf};
5
6pub trait Matcher<I: HaystackItem>: Debug + Default + Clone + Copy {
7    type AllMatches<'a, H: HaystackOf<'a, I>>: Iterator<Item = usize>;
8    type AllCaptures<'a, H: HaystackOf<'a, I>>: Iterator<Item = (usize, IndexedCaptures)>;
9
10    /// Checks if the start of the haystack contains a match for this [`Matcher`]. If this method
11    /// successfully matches the start of the haystack, `hay` is progressed so that `hay.item()`
12    /// hasn't been matched yet. On a fail, the state of hay is undefined.
13    fn matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> bool;
14
15    // It would be nice to use a custom Iterator here rather than a Vec, but reversing an arbitrary
16    // match is not easy, so we just progress through linearly and store them all.
17    // This could cause issues with huge haystacks, but: all regexes need to be compiled at compile
18    // time and are hence controlled by the author. If their pattern will be operating on huge
19    // haystacks and need backtracking, that's up to them.
20
21    /// Produces a Vec of all valid haystack states produced as the result of a valid match at the
22    /// start of `hay`, used to implement backtracking. The Vec is produced in reverse priority
23    /// order, so the last match has the highest priority. After calling all_matches, the state of
24    /// `hay` itself is undefined.
25    ///
26    /// # Required
27    /// This method needs to be implemented by all [`Matcher`]s that can match more than one string
28    /// of characters from a haystack.
29    fn all_matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> Self::AllMatches<'a, H>;
30
31    /// Checks if the start of the haystack contains a match for this Matcher, writing any groups
32    /// to `caps`. Similar to [`matches`], this method progresses `hay` and `caps` on a success. On
33    /// a fail, they have undefined states.
34    ///
35    /// # Required
36    /// This method needs to be implemented for capturing groups or any type that holds other
37    /// [`Matcher`]s, so that it can redirect to the relevant `capture` methods.
38    fn captures<'a, H: HaystackOf<'a, I>>(hay: &mut H, caps: &mut IndexedCaptures) -> bool {
39        let _ = caps;
40        Self::matches(hay)
41    }
42
43    /// Produces a Vec of all valid captures (and accompanying haystack states) present at the start
44    /// of `hay`. Used to implement backtracking for capturing methods. As with
45    /// [`all_matches`](Matcher::all_matches), the resulting Vec is produced in reverse priority
46    /// order. After calling all_captures, the state of `hay` and `caps` are undefined.
47    ///
48    /// # Required
49    /// This method needs to be implemented for any type that also implements
50    /// [`captures`](Matcher::captures) and [`all_matches`](Matcher::all_matches).
51    fn all_captures<'a, H: HaystackOf<'a, I>>(
52        hay: &mut H,
53        caps: &mut IndexedCaptures,
54    ) -> Self::AllCaptures<'a, H>;
55}
56
57// I'm just going to write this down cause I seem to keep forgetting. For alternations, either
58// branch may match a longer sequence in the haystack, but the first branch should be the priority.
59// This would complicate lazy semantics beyond just reversing the order of return values if it
60// wasn't for the simple fact that *alternations can't be lazy*. Laziness only applies to
61// quantifiers, even if that quantifiers is a 0-1 ("??" in the expression). Because there is always
62// a type between Lazy and Or in the type expression, it doesn't matter.
63// - Or<Lazy<..>, Lazy<..>> is valid and has clear semantics. No need for any special logic.
64// - Lazy<QuantifierNOrMore<Or<..>, N>> is also valid and matches Or eagerly during every
65//   repetition, rolling back if necessary.
66// - Lazy<Or<..>> is where things would get complicated, but there is no way to express it in an
67//   expression.
68
69pub trait LazyMatcher<I: HaystackItem>: Matcher<I> {
70    type LazyAllMatches<'a, H: HaystackOf<'a, I>>: Iterator<Item = usize>;
71    type LazyAllCaptures<'a, H: HaystackOf<'a, I>>: Iterator<Item = (usize, IndexedCaptures)>;
72
73    /// Functions exactly the same as [`Matcher::matches`], except that the haystack's index is left
74    /// at the first location where the match is considered successful.
75    fn lazy_matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> bool;
76
77    /// Functions exactly the same as [`Matcher::all_matches`], except that the indices are produced
78    /// in the opposite order. The first value returned by the iterator is the one first encountered
79    /// in the haystack, not the one that produces the longest match.
80    fn lazy_all_matches<'a, H: HaystackOf<'a, I>>(hay: &mut H) -> Self::LazyAllMatches<'a, H>;
81
82    /// Functions exactly the same as [`Matcher::captures`], except that the haystack's index and
83    /// captures represent the first location where the match is considered successful.
84    fn lazy_captures<'a, H: HaystackOf<'a, I>>(hay: &mut H, caps: &mut IndexedCaptures) -> bool;
85
86    /// Functions exactly the same as [`Matcher::all_captures`], except that the indices and
87    /// captures are produced in the opposite order. The first value returned by the iterator is the
88    /// one first encountered in the haystack, not the one that produces the longest match.
89    fn lazy_all_captures<'a, H: HaystackOf<'a, I>>(
90        hay: &mut H,
91        caps: &mut IndexedCaptures,
92    ) -> Self::LazyAllCaptures<'a, H>;
93}