ct_regex_internal/expr/
regex.rs

1use std::fmt::Debug;
2
3use crate::{haystack::{Haystack, HaystackItem, HaystackOf, IntoHaystack}, matcher::Matcher};
4use super::{CaptureFromRanges, IndexedCaptures};
5
6// TODO: Use iterator rather than Vec for return type.
7// TODO: Provide a method that returns a range too, not just a slice.
8// TODO: Switch to lazy rollback via iterators.
9
10/// A trait that is automatically implemented for types produced by the `regex!` macro. Various
11/// function are included that test this pattern against a provided [`Haystack`].
12///
13/// Altough rarely encountered, this trait's generic parameter, `I` refers to the item that can be
14/// matched individually from the provided `I::Slice`. This is used so that the same expression can
15/// be used to match various haystack types, including `&str` (`I = char`) and `&[u8]` (`I = u8`).
16/// Implementations for both of these slice/item pairs will be implemented by the macro.
17pub trait Regex<I: HaystackItem, const N: usize>: Debug {
18    type Pattern: Matcher<I>;
19
20    type Capture<'a, H: Haystack<'a>>: CaptureFromRanges<'a, H, N> where I: 'a;
21
22    /// Returns `true` if this Regex matches the **entire** haystack provided. This should probably
23    /// be the default _matching_ function to use.
24    ///
25    /// A similar behavior can be achieved by using start and end anchors in an expression and then
26    /// calling [`contains_match`](Self::contains_match). This function should be prefered however,
27    /// because it fails fast if the first character doesn't match.
28    ///
29    /// To check if this Regex matches and perform capturing, use [`do_capture`](Self::do_capture)
30    /// instead.
31    fn is_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
32        let mut hay = hay.into_haystack();
33
34        Self::Pattern::all_matches(&mut hay)
35            .iter()
36            .any(|state| hay.rollback(*state).is_end())
37    }
38
39    /// Returns `true` if this Regex matches any substring of the haystack provided. To retrieve the
40    /// actual substring itself, use [`slice_matching`](Self::slice_matching) or
41    /// [`find_capture`](Self::find_capture).
42    ///
43    /// Anchors can be used as a part of this Regex to perform more complex behaviors, but if you're
44    /// just wrapping an expression with `^` and `$`, see [`is_match`](Self::is_match) instead.
45    fn contains_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
46        let mut hay = hay.into_haystack();
47
48        while hay.item().is_some() {
49            let start = hay.index();
50
51            if Self::Pattern::all_matches(&mut hay).pop().is_some() {
52                return true;
53            }
54
55            hay.rollback(start).progress();
56        }
57        false
58    }
59
60    /// Returns the slice that matches this Regex first. This is the slicing variant of
61    /// [`contains_match`](Self::contains_match).
62    ///
63    /// This function runs through the Regex first and then the haystack. This has a slight semantic
64    /// difference in some scenarios.
65    ///
66    /// Note that there is no slicing equivalent of [`is_match`](Self::is_match), because any match
67    /// has to be the entire haystack.
68    fn slice_matching<'a, H: HaystackOf<'a, I>>(
69        hay: impl IntoHaystack<'a, H>
70    ) -> Option<H::Slice> {
71        let mut hay = hay.into_haystack();
72
73        while hay.item().is_some() {
74            let start = hay.index();
75
76            if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).pop() {
77                let cap = start..state_fork;
78                return Some(hay.slice(cap));
79            }
80
81            hay.rollback(start).progress()
82        }
83        None
84    }
85
86    /// Returns all slices of the provided haystack that match this Regex, optionally `overlapping`.
87    ///
88    /// This is the only match function that returns more than one result.
89    fn slice_all_matching<'a, H: HaystackOf<'a, I>>(
90        hay: impl IntoHaystack<'a, H>,
91        overlapping: bool
92    ) -> Vec<H::Slice> {
93        let mut hay = hay.into_haystack();
94
95        let mut all_matches = ::alloc::vec::Vec::new()vec![];
96
97        while hay.item().is_some() {
98            let start = hay.index();
99
100            if overlapping {
101                if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).pop() {
102                    all_matches.push(start..state_fork);
103                }
104
105                hay.rollback(start).progress();
106            } else {
107                if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).pop() {
108                    all_matches.push(start..state_fork);
109                    hay.rollback(state_fork);
110
111                    // This doesn't seem to make a difference...
112                    if true {
    match (&start, &state_fork) {
        (left_val, right_val) => {
            if *left_val == *right_val {
                let kind = ::core::panicking::AssertKind::Ne;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
}debug_assert_ne!(start, state_fork)
113                    // if start == state_fork {
114                    //     // We've already matched at this index.
115                    //     hay.progress();
116                    // }
117                } else {
118                    hay.rollback(start).progress();
119                }
120            }
121        }
122
123        all_matches.into_iter().map(|m| hay.slice(m)).collect()
124    }
125
126    /// Returns a [`Self::Capture`] representing the provided haystack matched against this Regex.
127    /// This includes any named or numbered capturing groups in the expression. As with
128    /// [`is_match`](Self::is_match), this function acts on the entire haystack, and needs to match
129    /// every character from start to end.
130    ///
131    /// Provides the same result as [`find_capture`](Self::find_capture) with start and end anchors,
132    /// although without needing to check any non-starting substring.
133    fn do_capture<'a, H: HaystackOf<'a, I>>(
134        hay: impl IntoHaystack<'a, H>
135    ) -> Option<Self::Capture<'a, H>> {
136        let mut hay = hay.into_haystack();
137
138        let mut caps = IndexedCaptures::default();
139
140        let start = hay.index();
141
142        let all_captures = Self::Pattern::all_captures(&mut hay, &mut caps)
143            .into_iter()
144            .rev();
145
146        for (state_fork, mut caps_fork) in all_captures {
147            if hay.rollback(state_fork).is_end() {
148                caps_fork.push(0, start..state_fork);
149
150                return Some(
151                    Self::Capture::from_ranges(caps_fork.into_array(), hay)
152                        .expect("failed to convert captures despite matching correctly")
153                );
154            }
155        }
156        return None;
157    }
158
159    /// Returns the [`Self::Capture`] that matches this Regex first, similar to
160    /// [`slice_matching`](Self::slice_matching) but with any named or numbered groups included.
161    ///
162    /// Anchors should be used for complex behavior, beyond unconditional start and end matches. See
163    /// [`do_capture`](Self::do_capture) instead to capture a full haystack.
164    fn find_capture<'a, H: HaystackOf<'a, I>>(
165        hay: impl IntoHaystack<'a, H>
166    ) -> Option<Self::Capture<'a, H>> {
167        let mut hay = hay.into_haystack();
168
169        let mut caps = IndexedCaptures::default();
170
171        while hay.item().is_some() {
172            let start = hay.index();
173
174            let first = Self::Pattern::all_captures(&mut hay.clone(), &mut caps)
175                .into_iter()
176                .last();
177
178            if let Some((state_fork, mut caps_fork)) = first {
179                caps_fork.push(0, start..state_fork);
180                hay.rollback(state_fork);
181
182                return Some(
183                    Self::Capture::from_ranges(caps_fork.into_array(), hay)
184                        .expect("failed to convert captures despite matching correctly")
185                );
186            }
187            hay.progress()
188        }
189        None
190    }
191
192    /// Returns a [`Self::Capture`] representing every full match of this Regex in the provided
193    /// haystack, similar to [`slice_all_matching`](Self::slice_all_matching). This can optionally
194    /// include `overlapping` matches.
195    fn find_all_captures<'a, H: HaystackOf<'a, I>>(
196        hay: impl IntoHaystack<'a, H>,
197        overlapping: bool
198    ) -> Vec<Self::Capture<'a, H>> {
199        {
    ::core::panicking::panic_fmt(format_args!("not yet implemented: {0}",
            format_args!("find_all_matches equivalent ({0:?}, {1:?})",
                hay.into_haystack(), overlapping)));
}todo!("find_all_matches equivalent ({:?}, {:?})", hay.into_haystack(), overlapping)
200    }
201}