Skip to main content

ct_regex_internal/expr/
regex.rs

1use std::fmt::Debug;
2use std::ops::Range;
3
4use super::{CaptureFromRanges, IndexedCaptures};
5use crate::expr::{FindAllCaptures, RangeOfAllMatches, SliceAllMatches};
6use crate::haystack::{
7    HaystackItem, HaystackIter, HaystackOf, HaystackSlice, IntoHaystack, OwnedHaystackable,
8};
9use crate::matcher::Matcher;
10
11/// A trait that is automatically implemented for types produced by the `regex!` macro. Various
12/// function are included that test this pattern against a provided
13/// [`Haystack`](crate::haystack::Haystack).
14///
15/// Most methods will take an [`IntoHaystack`] or [`OwnedHaystackable`] parameter to save the
16/// user from creating their own `Haystack`. This allows values with types like `&str` and
17/// `&mut String` to be passed to these methods.
18///
19/// Altough rarely encountered, this trait's generic parameter, `I` refers to the item that can be
20/// matched individually from the provided `Haystack`. This is used so that the same expression can
21/// be used to match various haystack types, including `&str` (`I = char`) and `&[u8]` (`I = u8`).
22/// Implementations for both of these slice/item pairs will be implemented by the macro.
23///
24/// # Function Coverage
25///
26#[doc = "| Description                            | Whole                            | First*                                       | All                                                                     |\n|-|-|-|-|\n| Checks for a match                     | [`is_match`](Self::is_match)     | [`contains_match`](Self::contains_match)     | [`count_matches`](Self::count_matches)                                  |\n| Returns range of match                 | -                                | [`range_of_match`](Self::range_of_match)     | [`range_of_all_matches`](Self::range_of_all_matches)                    |\n| Returns match as a slice               | -                                | [`slice_match`](Self::slice_match)           | [`slice_all_matches`](Self::slice_all_matches)                          |\n| Performs capturing using groups        | [`do_capture`](Self::do_capture) | [`find_capture`](Self::find_capture)         | [`find_all_captures`](Self::find_all_captures)                          |\n| Replaces match with value              | -                                | [`replace`](Self::replace)                   | [`replace_all`](Self::replace_all), [`_using`](Self::replace_all_using) |\n| Replaces match by transforming capture | -                                | [`replace_captured`](Self::replace_captured) | [`replace_all_captured`](Self::replace_all_captured)                    |"include_str!("coverage.md")]
27///
28/// \* Note that these function runs through the Regex first and then the haystack. This means that
29/// substring involved is the one that matches the Regex first, not necessarily the first match in
30/// the haystack. In many cases, this makes no difference.
31pub trait Regex<I: HaystackItem, const N: usize>: Debug {
32    /// This type is a macro generated combination of ZSTs responsible for doing all of the heavy
33    /// lifting involved in actually matching or capturing against a `Haystack`. For realistic
34    /// expressions, this type will be very long an unpleasant to type, hence implementing it as an
35    /// associated type.
36    type Pattern: Matcher<I>;
37
38    /// A macro generated type holding all `N` capture groups in this expression, producing ranges
39    /// or slices of the haystack with aliases for named groups. The generated type also understands
40    /// which groups will always exist in a match and which are optional.
41    type Capture<'a, S: HaystackSlice<'a>>: CaptureFromRanges<'a, S, N> where I: 'a;
42
43    /// Returns `true` if this Regex matches the **entire** haystack provided. This should probably
44    /// be the default _matching_ function to use.
45    ///
46    /// A similar behavior can be achieved by using start and end anchors in an expression and then
47    /// calling [`contains_match`](Self::contains_match). This function should be prefered however,
48    /// because it fails fast if the first character doesn't match.
49    ///
50    /// To check if this Regex matches and perform capturing, use [`do_capture`](Self::do_capture)
51    /// instead.
52    fn is_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
53        let mut hay = hay.into_haystack();
54
55        Self::Pattern::all_matches(&mut hay)
56            .any(|state| hay.rollback(state).is_end())
57    }
58
59    /// Returns `true` if this Regex matches any substring of the haystack provided. To retrieve the
60    /// actual substring itself, use [`slice_match`](Self::slice_match) or
61    /// [`find_capture`](Self::find_capture).
62    ///
63    /// Anchors can be used as a part of this Regex to perform more complex behaviors, but if you're
64    /// just wrapping an expression with `^` and `$`, see [`is_match`](Self::is_match) instead.
65    fn contains_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
66        let mut hay = hay.into_haystack();
67
68        while hay.item().is_some() {
69            let start = hay.index();
70
71            if Self::Pattern::all_matches(&mut hay).next().is_some() {
72                return true;
73            }
74
75            hay.rollback(start).progress();
76        }
77        false
78    }
79
80    /// Returns the number of matches present in the haystack provided, optionally including
81    /// `overlapping` matches.
82    ///
83    /// If using this in conjunction with the actual matches themselves, you might be better of
84    /// collecting the other output and checking the length.
85    fn count_matches<'a, H: HaystackOf<'a, I>>(
86        hay: impl IntoHaystack<'a, H>,
87        overlapping: bool,
88    ) -> usize {
89        let mut hay = hay.into_haystack();
90        let mut count = 0;
91
92        while hay.item().is_some() {
93            let start = hay.index();
94
95            if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).next() {
96                count += 1;
97
98                if overlapping {
99                    hay.rollback(start).progress();
100                } else {
101                    hay.rollback(state_fork);
102
103                    if true {
    match (&start, &state_fork) {
        (left_val, right_val) => {
            if *left_val == *right_val {
                let kind = ::core::panicking::AssertKind::Ne;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
}debug_assert_ne!(start, state_fork)
104                }
105            } else {
106                hay.rollback(start).progress();
107            }
108        }
109
110        count
111    }
112
113    /// Returns the range that matches this Regex first. This is the range variant of
114    /// [`contains_match`](Self::contains_match). For the actual substring itself, see
115    /// [`slice_match`](Self::slice_match).
116    ///
117    /// Note that there is no range equivalent of [`is_match`](Self::is_match), because any match
118    /// has to be the entire haystack.
119    fn range_of_match<'a, H: HaystackOf<'a, I>>(
120        hay: impl IntoHaystack<'a, H>,
121    ) -> Option<Range<usize>> {
122        let mut hay = hay.into_haystack();
123        range_of_match::<Self, _, _>(&mut hay)
124    }
125
126    /// Returns an iterator over the ranges of all substrings in the provided haystack that match
127    /// this Regex, optionally `overlapping`. For the actual substrins themself, see
128    /// [`slice_all_matches`](Self::slice_all_matches).
129    ///
130    /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
131    /// matches start at the same index in the haystack, only the first to match the regex will be
132    /// included.
133    fn range_of_all_matches<'a, H: HaystackOf<'a, I>>(
134        hay: impl IntoHaystack<'a, H>,
135        overlapping: bool,
136    ) -> RangeOfAllMatches<'a, I, H, Self::Pattern> {
137        RangeOfAllMatches::new(hay.into_haystack(), overlapping)
138    }
139
140    /// Returns the slice that matches this Regex first. This is the slicing variant of
141    /// [`range_of_match`](Self::range_of_match).
142    ///
143    /// Note that there is no slicing equivalent of [`is_match`](Self::is_match), because any match
144    /// has to be the entire haystack.
145    fn slice_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> Option<H::Slice> {
146        let mut hay = hay.into_haystack();
147        let range = range_of_match::<Self, _, _>(&mut hay)?;
148        Some(hay.slice_with(range))
149    }
150
151    /// Returns an iterator over all slices of the provided haystack that match this Regex,
152    /// optionally `overlapping`.
153    ///
154    /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
155    /// matches start at the same index in the haystack, only the first to match the regex will be
156    /// included.
157    ///
158    /// The returned iterator doesn't implement [`ExactSizeIterator`] because it is lazy, if you
159    /// need to know how many matches are included, [`collect`](Iterator::collect) it first.
160    fn slice_all_matches<'a, H: HaystackOf<'a, I>>(
161        hay: impl IntoHaystack<'a, H>,
162        overlapping: bool,
163    ) -> SliceAllMatches<'a, I, H, Self::Pattern> {
164        SliceAllMatches {
165            inner: RangeOfAllMatches::new(hay.into_haystack(), overlapping),
166        }
167    }
168
169    /// Returns a [`Self::Capture`] representing the provided haystack matched against this Regex.
170    /// This includes any named or numbered capturing groups in the expression. As with
171    /// [`is_match`](Self::is_match), this function acts on the entire haystack, and needs to match
172    /// every character from start to end.
173    ///
174    /// Provides the same result as [`find_capture`](Self::find_capture) with start and end anchors,
175    /// although without needing to check any non-starting substring.
176    fn do_capture<'a, H: HaystackOf<'a, I>>(
177        hay: impl IntoHaystack<'a, H>,
178    ) -> Option<Self::Capture<'a, H::Slice>> {
179        let mut hay = hay.into_haystack();
180        let mut caps = IndexedCaptures::default();
181        let start = hay.index();
182
183        let all_captures = Self::Pattern::all_captures(&mut hay, &mut caps);
184
185        for (state_fork, mut caps_fork) in all_captures {
186            if hay.rollback(state_fork).is_end() {
187                caps_fork.push(0, start..state_fork);
188
189                return Some(
190                    Self::Capture::from_ranges(caps_fork.into_array(), hay.whole_slice())
191                        .expect("failed to convert captures despite matching correctly")
192                );
193            }
194        }
195        None
196    }
197
198    /// Returns the [`Self::Capture`] that matches this Regex first, similar to
199    /// [`slice_match`](Self::slice_match) but with any named or numbered groups included.
200    ///
201    /// Anchors should be used for complex behavior, beyond unconditional start and end matches. See
202    /// [`do_capture`](Self::do_capture) instead to capture a full haystack.
203    fn find_capture<'a, H: HaystackOf<'a, I>>(
204        hay: impl IntoHaystack<'a, H>,
205    ) -> Option<Self::Capture<'a, H::Slice>> {
206        let mut hay = hay.into_haystack();
207
208        while hay.item().is_some() {
209            let start = hay.index();
210
211            let mut caps = IndexedCaptures::default();
212
213            let first = Self::Pattern::all_captures(&mut hay, &mut caps).next();
214
215            hay.rollback(start);
216
217            if let Some((state_fork, mut caps_fork)) = first {
218                caps_fork.push(0, start..state_fork);
219
220                return Some(
221                    Self::Capture::from_ranges(caps_fork.into_array(), hay.inner_slice())
222                        .expect("failed to convert captures despite matching correctly")
223                );
224            }
225            hay.progress()
226        }
227        None
228    }
229
230    /// Returns an iterator over [`Self::Capture`]s representing every full match of this Regex in
231    /// the provided haystack, similar to [`slice_all_matches`](Self::slice_all_matches). This can
232    /// optionally include `overlapping` matches.
233    ///
234    /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
235    /// matches start at the same index in the haystack, only the first to match the regex will be
236    /// included.
237    fn find_all_captures<'a, H: HaystackOf<'a, I>>(
238        hay: impl IntoHaystack<'a, H>,
239        overlapping: bool,
240    ) -> FindAllCaptures<'a, Self, I, H, N> {
241        FindAllCaptures::new(hay.into_haystack(), overlapping)
242    }
243
244    /// Replaces the first match of this Regex in the provided haystack with the provided slice. The
245    /// slice type required is the one associated with the provided haystack. The return value is a
246    /// boolean indicating whether a match was found and replaced.
247    fn replace<'a, M: OwnedHaystackable<I>>(
248        hay_mut: &mut M,
249        with: <M::Hay<'a> as HaystackIter<'a>>::Slice
250    ) -> bool {
251        let Some(range) = ({
252            let mut hay = hay_mut.as_haystack();
253            range_of_match::<Self, _, _>(&mut hay)
254        }) else {
255            return false;
256        };
257        hay_mut.replace_range(range, with);
258        true
259    }
260
261    /// Replaces every matching substring in the provided haystack with a copy of the provided
262    /// slice. The slice type required is the one associated with the provided haystack. The return
263    /// value is an integer representing the number of matches and replacements that occured.
264    fn replace_all<'a, M: OwnedHaystackable<I>>(
265        hay_mut: &mut M,
266        with: <M::Hay<'a> as HaystackIter<'a>>::Slice
267    ) -> usize {
268        // Avoids redirecting to replace_all_using to avoid unnessecary clones.
269        let ranges = RangeOfAllMatches::<I, M::Hay<'_>, Self::Pattern>::new(
270            hay_mut.as_haystack(),
271            false
272        ).collect::<Vec<_>>();
273
274        let count = ranges.len();
275        let mut delta = Delta::new();
276
277        for mut range in ranges {
278            delta.apply_to(&mut range);
279
280            let initial_len = hay_mut.len();
281            hay_mut.replace_range(range, with.clone());
282            delta.add_diff(hay_mut.len(), initial_len);
283        }
284
285        count
286    }
287
288    /// Replaces every matching substring in the provided haystack with the return value of the
289    /// provided function. The return type of this functions needs to match the provided haystack.
290    /// The return value is an integer representing the number of matches and replacements that
291    /// occured.
292    ///
293    /// Because of the use of [`FnMut`] for the parameter, this can be used to replace all matches
294    /// using an iterator by passing in `|| iter.next().unwrap_or_default()`.
295    fn replace_all_using<M: OwnedHaystackable<I>>(
296        hay_mut: &mut M,
297        mut using: impl FnMut() -> M,
298    ) -> usize {
299        let ranges = RangeOfAllMatches::<I, M::Hay<'_>, Self::Pattern>::new(
300            hay_mut.as_haystack(),
301            false
302        ).collect::<Vec<_>>();
303
304        let count = ranges.len();
305        let mut delta = Delta::new();
306
307        for mut range in ranges {
308            delta.apply_to(&mut range);
309
310            let initial_len = hay_mut.len();
311            hay_mut.replace_range(range, using().as_slice());
312            delta.add_diff(hay_mut.len(), initial_len);
313        }
314
315        count
316    }
317
318    // The closure returns M because it can't continue to reference the source, given that we need
319    // to overwrite it.
320
321    /// Replaces the first captured substring in the provided haystack with a computed value. The
322    /// return value is a boolean indicating whether a match was found and replaced.
323    ///
324    /// The provided function is used to create a replacement value when given the capture. The
325    /// replacement value shares a type with the provided haystack. Its simplified signature would
326    /// be `F: FnOnce(Self::Capture<'_, <M::Hay>::Slice>) -> M`. Because of limitations with higher
327    /// ranked trait bounds surrounding closure, it may be necessary to implement this as function
328    /// with lifetime annotations like so:
329    /// ```ignore
330    /// regex!(PhoneNum = r"(0|(?<country_code>\+[0-9]+))(?<number>[0-9]{9})");
331    ///
332    /// fn remove_country_code<'a>(value: PhoneNumCapture<'a, &'a str>) -> String {
333    ///     format!("0{}", value.number())
334    /// }
335    ///
336    /// fn main() {
337    ///     let mut hay = String::from("+1234567890");
338    ///     PhoneNum::replace_captured(hay, remove_country_code);
339    ///     assert_eq!(hay, "0234567890");
340    /// }
341    /// ```
342    fn replace_captured<M, F>(hay_mut: &mut M, replacer: F) -> bool
343    where
344        M: OwnedHaystackable<I>,
345        F: for<'a> FnOnce(Self::Capture<'a, <M::Hay<'a> as HaystackIter<'a>>::Slice>) -> M,
346    {
347        let (range, replacement) = {
348            let Some(caps) = Self::find_capture(hay_mut.as_haystack()) else {
349                return false;
350            };
351            let first = caps.whole_match_range().clone();
352
353            (first, replacer(caps))
354        };
355        hay_mut.replace_range(range, replacement.as_slice());
356        true
357    }
358
359    /// Replaces all captured substring in the provided haystack with a computed value. The return
360    /// value is an integer indicating the number of matches found and replaced.
361    ///
362    /// The provided function is used to create a replacement value when given a capture. The
363    /// replacement value shares a type with the provided haystack. Its simplified signature would
364    /// be `F: FnMut(Self::Capture<'_, <M::Hay>::Slice>) -> M`. Because of limitations with higher
365    /// ranked trait bounds surrounding closure, it may be necessary to implement this as function
366    /// with lifetime annotations as mentioned in the documentation for
367    /// [`replace_captured`](Self::replace_captured).
368    fn replace_all_captured<M, F>(hay_mut: &mut M, mut replacer: F) -> usize
369    where
370        M: OwnedHaystackable<I>,
371        F: for<'a> FnMut(Self::Capture<'a, <M::Hay<'a> as HaystackIter<'a>>::Slice>) -> M,
372    {
373        let replacements: Vec<_> = {
374            let caps = Self::find_all_captures(hay_mut.as_haystack(), false);
375            caps.into_iter()
376                .map(|c| (c.whole_match_range().clone(), replacer(c)))
377                .collect()
378        };
379
380        let count = replacements.len();
381        let mut delta = Delta::new();
382
383        for (mut range, replacement) in replacements {
384            delta.apply_to(&mut range);
385
386            let initial_len = hay_mut.len();
387            hay_mut.replace_range(range, replacement.as_slice());
388            delta.add_diff(hay_mut.len(), initial_len);
389        }
390
391        count
392    }
393}
394
395fn range_of_match<'a, R: Regex<I, N> + ?Sized, I: HaystackItem, const N: usize>(
396    hay: &mut impl HaystackOf<'a, I>,
397) -> Option<Range<usize>> {
398    while hay.item().is_some() {
399        let start = hay.index();
400
401        if let Some(state_fork) = R::Pattern::all_matches(hay).next() {
402            return Some(start..state_fork);
403        }
404
405        hay.rollback(start).progress()
406    }
407    None
408}
409
410#[derive(#[automatically_derived]
impl ::core::fmt::Debug for Delta {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Delta",
            &&self.0)
    }
}Debug, #[automatically_derived]
impl ::core::clone::Clone for Delta {
    #[inline]
    fn clone(&self) -> Delta { Delta(::core::clone::Clone::clone(&self.0)) }
}Clone)]
411struct Delta(isize);
412
413impl Delta {
414    fn new() -> Delta {
415        Delta(0)
416    }
417
418    fn add_diff(&mut self, from: usize, to: usize) {
419        self.0 = self.0.strict_add(
420            from.checked_signed_diff(to)
421                .expect("difference between usizes doesn't fit in an isize")
422        )
423    }
424
425    fn apply_to(&self, range: &mut Range<usize>) {
426        range.start = range.start.strict_add_signed(self.0);
427        range.end = range.end.strict_add_signed(self.0);
428    }
429}