Skip to main content

ct_regex_internal/expr/
regex.rs

1use std::fmt::Debug;
2use std::ops::Range;
3
4use super::{CaptureFromRanges, IndexedCaptures};
5use crate::expr::{FindAllCaptures, RangeOfAllMatches, SliceAllMatches};
6use crate::haystack::{
7    HaystackItem, HaystackIter, HaystackOf, HaystackSlice, IntoHaystack, OwnedHaystackable,
8};
9use crate::matcher::Matcher;
10
11/// A trait that is automatically implemented for types produced by the `regex!` macro. Various
12/// function are included that test this pattern against a provided
13/// [`Haystack`](crate::haystack::Haystack).
14///
15/// Most methods will take an [`IntoHaystack`] or [`OwnedHaystackable`] parameter to save the
16/// user from creating their own `Haystack`. This allows values with types like `&str` and
17/// `&mut String` to be passed to these methods.
18///
19/// Although rarely encountered, this trait's generic parameter, `I` refers to the item that can be
20/// matched individually from the provided `Haystack`. This is used so that the same expression can
21/// be used to match various haystack types, including `&str` (`I = char`) and `&[u8]` (`I = u8`).
22/// Implementations for both of these slice/item pairs will be implemented by the macro.
23///
24/// # Function Coverage
25///
26#[doc = "| Description                            | Whole                            | First*                                       | All                                                                     |\n|-|-|-|-|\n| Checks for a match                     | [`is_match`](Self::is_match)     | [`contains_match`](Self::contains_match)     | [`count_matches`](Self::count_matches)                                  |\n| Returns range of match                 | -                                | [`range_of_match`](Self::range_of_match)     | [`range_of_all_matches`](Self::range_of_all_matches)                    |\n| Returns match as a slice               | -                                | [`slice_match`](Self::slice_match)           | [`slice_all_matches`](Self::slice_all_matches)                          |\n| Performs capturing using groups        | [`do_capture`](Self::do_capture) | [`find_capture`](Self::find_capture)         | [`find_all_captures`](Self::find_all_captures)                          |\n| Replaces match with value              | -                                | [`replace`](Self::replace)                   | [`replace_all`](Self::replace_all), [`_using`](Self::replace_all_using) |\n| Replaces match by transforming capture | -                                | [`replace_captured`](Self::replace_captured) | [`replace_all_captured`](Self::replace_all_captured)                    |"include_str!("coverage.md")]
27///
28/// \* Note that these function runs through the Regex first and then the haystack. This means that
29/// substring involved is the one that matches the Regex first, not necessarily the first match in
30/// the haystack. In many cases, this makes no difference.
31pub trait Regex<I: HaystackItem, const N: usize>: Debug {
32    /// This type is a macro generated combination of ZSTs responsible for doing all of the heavy
33    /// lifting involved in actually matching or capturing against a `Haystack`. For realistic
34    /// expressions, this type will be very long an unpleasant to type, hence implementing it as an
35    /// associated type.
36    type Pattern: Matcher<I>;
37
38    /// A macro generated type holding all `N` capture groups in this expression, producing ranges
39    /// or slices of the haystack with aliases for named groups. The generated type also understands
40    /// which groups will always exist in a match and which are optional.
41    ///
42    /// Note that an `Capture` type represents the contents of all groups from a single match. Each
43    /// group only occurs once, if you're expecting the expression the match multiple times in a
44    /// haystack, you will have many `Capture`s.
45    type Capture<'a, S: HaystackSlice<'a>>: CaptureFromRanges<'a, S, N> where I: 'a;
46
47    /// Returns `true` if this Regex matches the **entire** haystack provided. This should probably
48    /// be the default _matching_ function to use.
49    ///
50    /// A similar behavior can be achieved by using start and end anchors in an expression and then
51    /// calling [`contains_match`](Self::contains_match). This function should be preferred however,
52    /// because it fails fast if the first character doesn't match.
53    ///
54    /// To check if this Regex matches and perform capturing, use [`do_capture`](Self::do_capture)
55    /// instead.
56    fn is_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
57        let mut hay = hay.into_haystack();
58
59        Self::Pattern::all_matches(&mut hay)
60            .any(|state| hay.rollback(state).is_end())
61    }
62
63    /// Returns `true` if this Regex matches any substring of the haystack provided. To retrieve the
64    /// actual substring itself, use [`slice_match`](Self::slice_match) or
65    /// [`find_capture`](Self::find_capture).
66    ///
67    /// Anchors can be used as a part of this Regex to perform more complex behaviors, but if you're
68    /// just wrapping an expression with `^` and `$`, see [`is_match`](Self::is_match) instead.
69    fn contains_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
70        let mut hay = hay.into_haystack();
71
72        while hay.item().is_some() {
73            let start = hay.index();
74
75            if Self::Pattern::all_matches(&mut hay).next().is_some() {
76                return true;
77            }
78
79            hay.rollback(start).progress();
80        }
81        false
82    }
83
84    /// Returns the number of matches present in the haystack provided, optionally including
85    /// `overlapping` matches.
86    ///
87    /// If using this in conjunction with the actual matches themselves, you might be better of
88    /// collecting the other output and checking the length.
89    fn count_matches<'a, H: HaystackOf<'a, I>>(
90        hay: impl IntoHaystack<'a, H>,
91        overlapping: bool,
92    ) -> usize {
93        let mut hay = hay.into_haystack();
94        let mut count = 0;
95
96        while hay.item().is_some() {
97            let start = hay.index();
98
99            if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).next() {
100                count += 1;
101
102                if overlapping {
103                    hay.rollback(start).progress();
104                } else {
105                    hay.rollback(state_fork);
106
107                    if true {
    match (&start, &state_fork) {
        (left_val, right_val) => {
            if *left_val == *right_val {
                let kind = ::core::panicking::AssertKind::Ne;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
}debug_assert_ne!(start, state_fork)
108                }
109            } else {
110                hay.rollback(start).progress();
111            }
112        }
113
114        count
115    }
116
117    /// Returns the range that matches this Regex first. This is the range variant of
118    /// [`contains_match`](Self::contains_match). For the actual substring itself, see
119    /// [`slice_match`](Self::slice_match).
120    ///
121    /// Note that there is no range equivalent of [`is_match`](Self::is_match), because any match
122    /// has to be the entire haystack.
123    fn range_of_match<'a, H: HaystackOf<'a, I>>(
124        hay: impl IntoHaystack<'a, H>,
125    ) -> Option<Range<usize>> {
126        let mut hay = hay.into_haystack();
127        range_of_match::<Self, _, _>(&mut hay)
128    }
129
130    /// Returns an iterator over the ranges of all substrings in the provided haystack that match
131    /// this Regex, optionally `overlapping`. For the actual substrings themselves, see
132    /// [`slice_all_matches`](Self::slice_all_matches).
133    ///
134    /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
135    /// matches start at the same index in the haystack, only the first to match the regex will be
136    /// included.
137    fn range_of_all_matches<'a, H: HaystackOf<'a, I>>(
138        hay: impl IntoHaystack<'a, H>,
139        overlapping: bool,
140    ) -> RangeOfAllMatches<'a, I, H, Self::Pattern> {
141        RangeOfAllMatches::new(hay.into_haystack(), overlapping)
142    }
143
144    /// Returns the slice that matches this Regex first. This is the slicing variant of
145    /// [`range_of_match`](Self::range_of_match).
146    ///
147    /// Note that there is no slicing equivalent of [`is_match`](Self::is_match), because any match
148    /// has to be the entire haystack.
149    fn slice_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> Option<H::Slice> {
150        let mut hay = hay.into_haystack();
151        let range = range_of_match::<Self, _, _>(&mut hay)?;
152        Some(hay.slice_with(range))
153    }
154
155    /// Returns an iterator over all slices of the provided haystack that match this Regex,
156    /// optionally `overlapping`.
157    ///
158    /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
159    /// matches start at the same index in the haystack, only the first to match the regex will be
160    /// included.
161    ///
162    /// The returned iterator doesn't implement [`ExactSizeIterator`] because it is lazy, if you
163    /// need to know how many matches are included, [`collect`](Iterator::collect) it first.
164    fn slice_all_matches<'a, H: HaystackOf<'a, I>>(
165        hay: impl IntoHaystack<'a, H>,
166        overlapping: bool,
167    ) -> SliceAllMatches<'a, I, H, Self::Pattern> {
168        SliceAllMatches {
169            inner: RangeOfAllMatches::new(hay.into_haystack(), overlapping),
170        }
171    }
172
173    /// Returns a [`Self::Capture`] representing the provided haystack matched against this Regex.
174    /// This includes any named or numbered capturing groups in the expression. As with
175    /// [`is_match`](Self::is_match), this function acts on the entire haystack, and needs to match
176    /// every character from start to end.
177    ///
178    /// Provides the same result as [`find_capture`](Self::find_capture) with start and end anchors,
179    /// although without needing to check any non-starting substring.
180    fn do_capture<'a, H: HaystackOf<'a, I>>(
181        hay: impl IntoHaystack<'a, H>,
182    ) -> Option<Self::Capture<'a, H::Slice>> {
183        let mut hay = hay.into_haystack();
184        let mut caps = IndexedCaptures::default();
185        let start = hay.index();
186
187        let all_captures = Self::Pattern::all_captures(&mut hay, &mut caps);
188
189        for (state_fork, mut caps_fork) in all_captures {
190            if hay.rollback(state_fork).is_end() {
191                caps_fork.push(0, start..state_fork);
192
193                return Some(
194                    Self::Capture::from_ranges(caps_fork.into_array(), hay.whole_slice())
195                        .expect("failed to convert captures despite matching correctly")
196                );
197            }
198        }
199        None
200    }
201
202    /// Returns the [`Self::Capture`] that matches this Regex first, similar to
203    /// [`slice_match`](Self::slice_match) but with any named or numbered groups included.
204    ///
205    /// Anchors should be used for complex behavior, beyond unconditional start and end matches. See
206    /// [`do_capture`](Self::do_capture) instead to capture a full haystack.
207    fn find_capture<'a, H: HaystackOf<'a, I>>(
208        hay: impl IntoHaystack<'a, H>,
209    ) -> Option<Self::Capture<'a, H::Slice>> {
210        let mut hay = hay.into_haystack();
211
212        while hay.item().is_some() {
213            let start = hay.index();
214
215            let mut caps = IndexedCaptures::default();
216
217            let first = Self::Pattern::all_captures(&mut hay, &mut caps).next();
218
219            hay.rollback(start);
220
221            if let Some((state_fork, mut caps_fork)) = first {
222                caps_fork.push(0, start..state_fork);
223
224                return Some(
225                    Self::Capture::from_ranges(caps_fork.into_array(), hay.inner_slice())
226                        .expect("failed to convert captures despite matching correctly")
227                );
228            }
229            hay.progress()
230        }
231        None
232    }
233
234    /// Returns an iterator over [`Self::Capture`]s representing every full match of this Regex in
235    /// the provided haystack, similar to [`slice_all_matches`](Self::slice_all_matches). This can
236    /// optionally include `overlapping` matches.
237    ///
238    /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
239    /// matches start at the same index in the haystack, only the first to match the regex will be
240    /// included.
241    fn find_all_captures<'a, H: HaystackOf<'a, I>>(
242        hay: impl IntoHaystack<'a, H>,
243        overlapping: bool,
244    ) -> FindAllCaptures<'a, Self, I, H, N> {
245        FindAllCaptures::new(hay.into_haystack(), overlapping)
246    }
247
248    /// Replaces the first match of this Regex in the provided haystack with the provided slice. The
249    /// slice type required is the one associated with the provided haystack. The return value is a
250    /// boolean indicating whether a match was found and replaced.
251    fn replace<'a, M: OwnedHaystackable<I>>(
252        hay_mut: &mut M,
253        with: <M::Hay<'a> as HaystackIter<'a>>::Slice
254    ) -> bool {
255        let Some(range) = ({
256            let mut hay = hay_mut.as_haystack();
257            range_of_match::<Self, _, _>(&mut hay)
258        }) else {
259            return false;
260        };
261        hay_mut.replace_range(range, with);
262        true
263    }
264
265    /// Replaces every matching substring in the provided haystack with a copy of the provided
266    /// slice. The slice type required is the one associated with the provided haystack. The return
267    /// value is an integer representing the number of matches and replacements that occurred.
268    fn replace_all<'a, M: OwnedHaystackable<I>>(
269        hay_mut: &mut M,
270        with: <M::Hay<'a> as HaystackIter<'a>>::Slice
271    ) -> usize {
272        // Avoids redirecting to replace_all_using to avoid unnecessary clones.
273        let ranges = RangeOfAllMatches::<I, M::Hay<'_>, Self::Pattern>::new(
274            hay_mut.as_haystack(),
275            false
276        ).collect::<Vec<_>>();
277
278        let count = ranges.len();
279        let mut delta = Delta::default();
280
281        for mut range in ranges {
282            delta.apply_to(&mut range);
283
284            let initial_len = hay_mut.len();
285            hay_mut.replace_range(range, with.clone());
286            delta.add_diff(hay_mut.len(), initial_len);
287        }
288
289        count
290    }
291
292    /// Replaces every matching substring in the provided haystack with the return value of the
293    /// provided function. The return type of this functions needs to match the provided haystack.
294    /// The return value is an integer representing the number of matches and replacements that
295    /// occurred.
296    ///
297    /// Because of the use of [`FnMut`] for the parameter, this can be used to replace all matches
298    /// using an iterator by passing in `|| iter.next().unwrap_or_default()`.
299    fn replace_all_using<M: OwnedHaystackable<I>>(
300        hay_mut: &mut M,
301        mut using: impl FnMut() -> M,
302    ) -> usize {
303        let ranges = RangeOfAllMatches::<I, M::Hay<'_>, Self::Pattern>::new(
304            hay_mut.as_haystack(),
305            false
306        ).collect::<Vec<_>>();
307
308        let count = ranges.len();
309        let mut delta = Delta::default();
310
311        for mut range in ranges {
312            delta.apply_to(&mut range);
313
314            let initial_len = hay_mut.len();
315            hay_mut.replace_range(range, using().as_slice());
316            delta.add_diff(hay_mut.len(), initial_len);
317        }
318
319        count
320    }
321
322    // The closure returns M because it can't continue to reference the source, given that we need
323    // to overwrite it.
324
325    /// Replaces the first captured substring in the provided haystack with a computed value. The
326    /// return value is a boolean indicating whether a match was found and replaced.
327    ///
328    /// The provided function is used to create a replacement value when given the capture. The
329    /// replacement value shares a type with the provided haystack. Its simplified signature would
330    /// be `F: FnOnce(Self::Capture<'_, <M::Hay>::Slice>) -> M`. Because of limitations with higher
331    /// ranked trait bounds surrounding closure, it may be necessary to implement this as function
332    /// with lifetime annotations like so:
333    /// ```ignore
334    /// regex!(PhoneNum = r"(0|(?<country_code>\+[0-9]+))(?<number>[0-9]{9})");
335    ///
336    /// fn remove_country_code<'a>(value: PhoneNumCapture<'a, &'a str>) -> String {
337    ///     format!("0{}", value.number())
338    /// }
339    ///
340    /// fn main() {
341    ///     let mut hay = String::from("+1234567890");
342    ///     PhoneNum::replace_captured(hay, remove_country_code);
343    ///     assert_eq!(hay, "0234567890");
344    /// }
345    /// ```
346    fn replace_captured<M, F>(hay_mut: &mut M, replacer: F) -> bool
347    where
348        M: OwnedHaystackable<I>,
349        F: for<'a> FnOnce(Self::Capture<'a, <M::Hay<'a> as HaystackIter<'a>>::Slice>) -> M,
350    {
351        let (range, replacement) = {
352            let Some(caps) = Self::find_capture(hay_mut.as_haystack()) else {
353                return false;
354            };
355            let first = caps.whole_match_range().clone();
356
357            (first, replacer(caps))
358        };
359        hay_mut.replace_range(range, replacement.as_slice());
360        true
361    }
362
363    /// Replaces all captured substring in the provided haystack with a computed value. The return
364    /// value is an integer indicating the number of matches found and replaced.
365    ///
366    /// The provided function is used to create a replacement value when given a capture. The
367    /// replacement value shares a type with the provided haystack. Its simplified signature would
368    /// be `F: FnMut(Self::Capture<'_, <M::Hay>::Slice>) -> M`. Because of limitations with higher
369    /// ranked trait bounds surrounding closure, it may be necessary to implement this as function
370    /// with lifetime annotations as mentioned in the documentation for
371    /// [`replace_captured`](Self::replace_captured).
372    fn replace_all_captured<M, F>(hay_mut: &mut M, mut replacer: F) -> usize
373    where
374        M: OwnedHaystackable<I>,
375        F: for<'a> FnMut(Self::Capture<'a, <M::Hay<'a> as HaystackIter<'a>>::Slice>) -> M,
376    {
377        let replacements: Vec<_> = {
378            let caps = Self::find_all_captures(hay_mut.as_haystack(), false);
379            caps.into_iter()
380                .map(|c| (c.whole_match_range().clone(), replacer(c)))
381                .collect()
382        };
383
384        let count = replacements.len();
385        let mut delta = Delta::default();
386
387        for (mut range, replacement) in replacements {
388            delta.apply_to(&mut range);
389
390            let initial_len = hay_mut.len();
391            hay_mut.replace_range(range, replacement.as_slice());
392            delta.add_diff(hay_mut.len(), initial_len);
393        }
394
395        count
396    }
397}
398
399fn range_of_match<'a, R: Regex<I, N> + ?Sized, I: HaystackItem, const N: usize>(
400    hay: &mut impl HaystackOf<'a, I>,
401) -> Option<Range<usize>> {
402    while hay.item().is_some() {
403        let start = hay.index();
404
405        if let Some(state_fork) = R::Pattern::all_matches(hay).next() {
406            return Some(start..state_fork);
407        }
408
409        hay.rollback(start).progress()
410    }
411    None
412}
413
414/// A helper type for tracking changes in [`OwnedHaystackable`] size when replacing ranges. The type
415/// understands using signed addition for unsigned results.
416#[derive(#[automatically_derived]
impl ::core::fmt::Debug for Delta {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Delta",
            &&self.0)
    }
}Debug, #[automatically_derived]
impl ::core::default::Default for Delta {
    #[inline]
    fn default() -> Delta { Delta(::core::default::Default::default()) }
}Default, #[automatically_derived]
impl ::core::clone::Clone for Delta {
    #[inline]
    fn clone(&self) -> Delta { Delta(::core::clone::Clone::clone(&self.0)) }
}Clone)]
417struct Delta(isize);
418
419impl Delta {
420    /// Add to this `Delta` the difference between the new length, `from`, and the old length, `to`.
421    fn add_diff(&mut self, from: usize, to: usize) {
422        self.0 = self.0.strict_add(
423            from.checked_signed_diff(to)
424                .expect("difference between usizes doesn't fit in an isize")
425        )
426    }
427
428    /// Apply this `Delta` to both elements of `range`.
429    fn apply_to(&self, range: &mut Range<usize>) {
430        range.start = range.start.strict_add_signed(self.0);
431        range.end = range.end.strict_add_signed(self.0);
432    }
433}