ct_regex_internal/expr/regex.rs
1use std::fmt::Debug;
2use std::ops::Range;
3
4use super::{CaptureFromRanges, IndexedCaptures};
5use crate::expr::{FindAllCaptures, RangeOfAllMatches, SliceAllMatches};
6use crate::haystack::{
7 HaystackItem, HaystackIter, HaystackOf, HaystackSlice, IntoHaystack, OwnedHaystackable,
8};
9use crate::matcher::Matcher;
10
11/// A trait that is automatically implemented for types produced by the `regex!` macro. Various
12/// function are included that test this pattern against a provided
13/// [`Haystack`](crate::haystack::Haystack).
14///
15/// Most methods will take an [`IntoHaystack`] or [`OwnedHaystackable`] parameter to save the
16/// user from creating their own `Haystack`. This allows values with types like `&str` and
17/// `&mut String` to be passed to these methods.
18///
19/// Although rarely encountered, this trait's generic parameter, `I` refers to the item that can be
20/// matched individually from the provided `Haystack`. This is used so that the same expression can
21/// be used to match various haystack types, including `&str` (`I = char`) and `&[u8]` (`I = u8`).
22/// Implementations for both of these slice/item pairs will be implemented by the macro.
23///
24/// # Function Coverage
25///
26#[doc = include_str!("coverage.md")]
27///
28/// \* Note that these function runs through the Regex first and then the haystack. This means that
29/// substring involved is the one that matches the Regex first, not necessarily the first match in
30/// the haystack. In many cases, this makes no difference.
31pub trait Regex<I: HaystackItem, const N: usize>: Debug {
32 /// This type is a macro generated combination of ZSTs responsible for doing all of the heavy
33 /// lifting involved in actually matching or capturing against a `Haystack`. For realistic
34 /// expressions, this type will be very long an unpleasant to type, hence implementing it as an
35 /// associated type.
36 type Pattern: Matcher<I>;
37
38 /// A macro generated type holding all `N` capture groups in this expression, producing ranges
39 /// or slices of the haystack with aliases for named groups. The generated type also understands
40 /// which groups will always exist in a match and which are optional.
41 ///
42 /// Note that an `Capture` type represents the contents of all groups from a single match. Each
43 /// group only occurs once, if you're expecting the expression the match multiple times in a
44 /// haystack, you will have many `Capture`s.
45 type Capture<'a, S: HaystackSlice<'a>>: CaptureFromRanges<'a, S, N> where I: 'a;
46
47 /// Returns `true` if this Regex matches the **entire** haystack provided. This should probably
48 /// be the default _matching_ function to use.
49 ///
50 /// A similar behavior can be achieved by using start and end anchors in an expression and then
51 /// calling [`contains_match`](Self::contains_match). This function should be preferred however,
52 /// because it fails fast if the first character doesn't match.
53 ///
54 /// To check if this Regex matches and perform capturing, use [`do_capture`](Self::do_capture)
55 /// instead.
56 fn is_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
57 let mut hay = hay.into_haystack();
58
59 Self::Pattern::all_matches(&mut hay)
60 .any(|state| hay.rollback(state).is_end())
61 }
62
63 /// Returns `true` if this Regex matches any substring of the haystack provided. To retrieve the
64 /// actual substring itself, use [`slice_match`](Self::slice_match) or
65 /// [`find_capture`](Self::find_capture).
66 ///
67 /// Anchors can be used as a part of this Regex to perform more complex behaviors, but if you're
68 /// just wrapping an expression with `^` and `$`, see [`is_match`](Self::is_match) instead.
69 fn contains_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
70 let mut hay = hay.into_haystack();
71
72 while hay.item().is_some() {
73 let start = hay.index();
74
75 if Self::Pattern::all_matches(&mut hay).next().is_some() {
76 return true;
77 }
78
79 hay.rollback(start).progress();
80 }
81 false
82 }
83
84 /// Returns the number of matches present in the haystack provided, optionally including
85 /// `overlapping` matches.
86 ///
87 /// If using this in conjunction with the actual matches themselves, you might be better of
88 /// collecting the other output and checking the length.
89 fn count_matches<'a, H: HaystackOf<'a, I>>(
90 hay: impl IntoHaystack<'a, H>,
91 overlapping: bool,
92 ) -> usize {
93 let mut hay = hay.into_haystack();
94 let mut count = 0;
95
96 while hay.item().is_some() {
97 let start = hay.index();
98
99 if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).next() {
100 count += 1;
101
102 if overlapping {
103 hay.rollback(start).progress();
104 } else {
105 hay.rollback(state_fork);
106
107 debug_assert_ne!(start, state_fork)
108 }
109 } else {
110 hay.rollback(start).progress();
111 }
112 }
113
114 count
115 }
116
117 /// Returns the range that matches this Regex first. This is the range variant of
118 /// [`contains_match`](Self::contains_match). For the actual substring itself, see
119 /// [`slice_match`](Self::slice_match).
120 ///
121 /// Note that there is no range equivalent of [`is_match`](Self::is_match), because any match
122 /// has to be the entire haystack.
123 fn range_of_match<'a, H: HaystackOf<'a, I>>(
124 hay: impl IntoHaystack<'a, H>,
125 ) -> Option<Range<usize>> {
126 let mut hay = hay.into_haystack();
127 range_of_match::<Self, _, _>(&mut hay)
128 }
129
130 /// Returns an iterator over the ranges of all substrings in the provided haystack that match
131 /// this Regex, optionally `overlapping`. For the actual substrings themselves, see
132 /// [`slice_all_matches`](Self::slice_all_matches).
133 ///
134 /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
135 /// matches start at the same index in the haystack, only the first to match the regex will be
136 /// included.
137 fn range_of_all_matches<'a, H: HaystackOf<'a, I>>(
138 hay: impl IntoHaystack<'a, H>,
139 overlapping: bool,
140 ) -> RangeOfAllMatches<'a, I, H, Self::Pattern> {
141 RangeOfAllMatches::new(hay.into_haystack(), overlapping)
142 }
143
144 /// Returns the slice that matches this Regex first. This is the slicing variant of
145 /// [`range_of_match`](Self::range_of_match).
146 ///
147 /// Note that there is no slicing equivalent of [`is_match`](Self::is_match), because any match
148 /// has to be the entire haystack.
149 fn slice_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> Option<H::Slice> {
150 let mut hay = hay.into_haystack();
151 let range = range_of_match::<Self, _, _>(&mut hay)?;
152 Some(hay.slice_with(range))
153 }
154
155 /// Returns an iterator over all slices of the provided haystack that match this Regex,
156 /// optionally `overlapping`.
157 ///
158 /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
159 /// matches start at the same index in the haystack, only the first to match the regex will be
160 /// included.
161 ///
162 /// The returned iterator doesn't implement [`ExactSizeIterator`] because it is lazy, if you
163 /// need to know how many matches are included, [`collect`](Iterator::collect) it first.
164 fn slice_all_matches<'a, H: HaystackOf<'a, I>>(
165 hay: impl IntoHaystack<'a, H>,
166 overlapping: bool,
167 ) -> SliceAllMatches<'a, I, H, Self::Pattern> {
168 SliceAllMatches {
169 inner: RangeOfAllMatches::new(hay.into_haystack(), overlapping),
170 }
171 }
172
173 /// Returns a [`Self::Capture`] representing the provided haystack matched against this Regex.
174 /// This includes any named or numbered capturing groups in the expression. As with
175 /// [`is_match`](Self::is_match), this function acts on the entire haystack, and needs to match
176 /// every character from start to end.
177 ///
178 /// Provides the same result as [`find_capture`](Self::find_capture) with start and end anchors,
179 /// although without needing to check any non-starting substring.
180 fn do_capture<'a, H: HaystackOf<'a, I>>(
181 hay: impl IntoHaystack<'a, H>,
182 ) -> Option<Self::Capture<'a, H::Slice>> {
183 let mut hay = hay.into_haystack();
184 let mut caps = IndexedCaptures::default();
185 let start = hay.index();
186
187 let all_captures = Self::Pattern::all_captures(&mut hay, &mut caps);
188
189 for (state_fork, mut caps_fork) in all_captures {
190 if hay.rollback(state_fork).is_end() {
191 caps_fork.push(0, start..state_fork);
192
193 return Some(
194 Self::Capture::from_ranges(caps_fork.into_array(), hay.whole_slice())
195 .expect("failed to convert captures despite matching correctly")
196 );
197 }
198 }
199 None
200 }
201
202 /// Returns the [`Self::Capture`] that matches this Regex first, similar to
203 /// [`slice_match`](Self::slice_match) but with any named or numbered groups included.
204 ///
205 /// Anchors should be used for complex behavior, beyond unconditional start and end matches. See
206 /// [`do_capture`](Self::do_capture) instead to capture a full haystack.
207 fn find_capture<'a, H: HaystackOf<'a, I>>(
208 hay: impl IntoHaystack<'a, H>,
209 ) -> Option<Self::Capture<'a, H::Slice>> {
210 let mut hay = hay.into_haystack();
211
212 while hay.item().is_some() {
213 let start = hay.index();
214
215 let mut caps = IndexedCaptures::default();
216
217 let first = Self::Pattern::all_captures(&mut hay, &mut caps).next();
218
219 hay.rollback(start);
220
221 if let Some((state_fork, mut caps_fork)) = first {
222 caps_fork.push(0, start..state_fork);
223
224 return Some(
225 Self::Capture::from_ranges(caps_fork.into_array(), hay.inner_slice())
226 .expect("failed to convert captures despite matching correctly")
227 );
228 }
229 hay.progress()
230 }
231 None
232 }
233
234 /// Returns an iterator over [`Self::Capture`]s representing every full match of this Regex in
235 /// the provided haystack, similar to [`slice_all_matches`](Self::slice_all_matches). This can
236 /// optionally include `overlapping` matches.
237 ///
238 /// Note that each match is still made greedily. Even with `overlapping = true`, if two possible
239 /// matches start at the same index in the haystack, only the first to match the regex will be
240 /// included.
241 fn find_all_captures<'a, H: HaystackOf<'a, I>>(
242 hay: impl IntoHaystack<'a, H>,
243 overlapping: bool,
244 ) -> FindAllCaptures<'a, Self, I, H, N> {
245 FindAllCaptures::new(hay.into_haystack(), overlapping)
246 }
247
248 /// Replaces the first match of this Regex in the provided haystack with the provided slice. The
249 /// slice type required is the one associated with the provided haystack. The return value is a
250 /// boolean indicating whether a match was found and replaced.
251 fn replace<'a, M: OwnedHaystackable<I>>(
252 hay_mut: &mut M,
253 with: <M::Hay<'a> as HaystackIter<'a>>::Slice
254 ) -> bool {
255 let Some(range) = ({
256 let mut hay = hay_mut.as_haystack();
257 range_of_match::<Self, _, _>(&mut hay)
258 }) else {
259 return false;
260 };
261 hay_mut.replace_range(range, with);
262 true
263 }
264
265 /// Replaces every matching substring in the provided haystack with a copy of the provided
266 /// slice. The slice type required is the one associated with the provided haystack. The return
267 /// value is an integer representing the number of matches and replacements that occurred.
268 fn replace_all<'a, M: OwnedHaystackable<I>>(
269 hay_mut: &mut M,
270 with: <M::Hay<'a> as HaystackIter<'a>>::Slice
271 ) -> usize {
272 // Avoids redirecting to replace_all_using to avoid unnecessary clones.
273 let ranges = RangeOfAllMatches::<I, M::Hay<'_>, Self::Pattern>::new(
274 hay_mut.as_haystack(),
275 false
276 ).collect::<Vec<_>>();
277
278 let count = ranges.len();
279 let mut delta = Delta::default();
280
281 for mut range in ranges {
282 delta.apply_to(&mut range);
283
284 let initial_len = hay_mut.len();
285 hay_mut.replace_range(range, with.clone());
286 delta.add_diff(hay_mut.len(), initial_len);
287 }
288
289 count
290 }
291
292 /// Replaces every matching substring in the provided haystack with the return value of the
293 /// provided function. The return type of this functions needs to match the provided haystack.
294 /// The return value is an integer representing the number of matches and replacements that
295 /// occurred.
296 ///
297 /// Because of the use of [`FnMut`] for the parameter, this can be used to replace all matches
298 /// using an iterator by passing in `|| iter.next().unwrap_or_default()`.
299 fn replace_all_using<M: OwnedHaystackable<I>>(
300 hay_mut: &mut M,
301 mut using: impl FnMut() -> M,
302 ) -> usize {
303 let ranges = RangeOfAllMatches::<I, M::Hay<'_>, Self::Pattern>::new(
304 hay_mut.as_haystack(),
305 false
306 ).collect::<Vec<_>>();
307
308 let count = ranges.len();
309 let mut delta = Delta::default();
310
311 for mut range in ranges {
312 delta.apply_to(&mut range);
313
314 let initial_len = hay_mut.len();
315 hay_mut.replace_range(range, using().as_slice());
316 delta.add_diff(hay_mut.len(), initial_len);
317 }
318
319 count
320 }
321
322 // The closure returns M because it can't continue to reference the source, given that we need
323 // to overwrite it.
324
325 /// Replaces the first captured substring in the provided haystack with a computed value. The
326 /// return value is a boolean indicating whether a match was found and replaced.
327 ///
328 /// The provided function is used to create a replacement value when given the capture. The
329 /// replacement value shares a type with the provided haystack. Its simplified signature would
330 /// be `F: FnOnce(Self::Capture<'_, <M::Hay>::Slice>) -> M`. Because of limitations with higher
331 /// ranked trait bounds surrounding closure, it may be necessary to implement this as function
332 /// with lifetime annotations like so:
333 /// ```ignore
334 /// regex!(PhoneNum = r"(0|(?<country_code>\+[0-9]+))(?<number>[0-9]{9})");
335 ///
336 /// fn remove_country_code<'a>(value: PhoneNumCapture<'a, &'a str>) -> String {
337 /// format!("0{}", value.number())
338 /// }
339 ///
340 /// fn main() {
341 /// let mut hay = String::from("+1234567890");
342 /// PhoneNum::replace_captured(hay, remove_country_code);
343 /// assert_eq!(hay, "0234567890");
344 /// }
345 /// ```
346 fn replace_captured<M, F>(hay_mut: &mut M, replacer: F) -> bool
347 where
348 M: OwnedHaystackable<I>,
349 F: for<'a> FnOnce(Self::Capture<'a, <M::Hay<'a> as HaystackIter<'a>>::Slice>) -> M,
350 {
351 let (range, replacement) = {
352 let Some(caps) = Self::find_capture(hay_mut.as_haystack()) else {
353 return false;
354 };
355 let first = caps.whole_match_range().clone();
356
357 (first, replacer(caps))
358 };
359 hay_mut.replace_range(range, replacement.as_slice());
360 true
361 }
362
363 /// Replaces all captured substring in the provided haystack with a computed value. The return
364 /// value is an integer indicating the number of matches found and replaced.
365 ///
366 /// The provided function is used to create a replacement value when given a capture. The
367 /// replacement value shares a type with the provided haystack. Its simplified signature would
368 /// be `F: FnMut(Self::Capture<'_, <M::Hay>::Slice>) -> M`. Because of limitations with higher
369 /// ranked trait bounds surrounding closure, it may be necessary to implement this as function
370 /// with lifetime annotations as mentioned in the documentation for
371 /// [`replace_captured`](Self::replace_captured).
372 fn replace_all_captured<M, F>(hay_mut: &mut M, mut replacer: F) -> usize
373 where
374 M: OwnedHaystackable<I>,
375 F: for<'a> FnMut(Self::Capture<'a, <M::Hay<'a> as HaystackIter<'a>>::Slice>) -> M,
376 {
377 let replacements: Vec<_> = {
378 let caps = Self::find_all_captures(hay_mut.as_haystack(), false);
379 caps.into_iter()
380 .map(|c| (c.whole_match_range().clone(), replacer(c)))
381 .collect()
382 };
383
384 let count = replacements.len();
385 let mut delta = Delta::default();
386
387 for (mut range, replacement) in replacements {
388 delta.apply_to(&mut range);
389
390 let initial_len = hay_mut.len();
391 hay_mut.replace_range(range, replacement.as_slice());
392 delta.add_diff(hay_mut.len(), initial_len);
393 }
394
395 count
396 }
397}
398
399fn range_of_match<'a, R: Regex<I, N> + ?Sized, I: HaystackItem, const N: usize>(
400 hay: &mut impl HaystackOf<'a, I>,
401) -> Option<Range<usize>> {
402 while hay.item().is_some() {
403 let start = hay.index();
404
405 if let Some(state_fork) = R::Pattern::all_matches(hay).next() {
406 return Some(start..state_fork);
407 }
408
409 hay.rollback(start).progress()
410 }
411 None
412}
413
414/// A helper type for tracking changes in [`OwnedHaystackable`] size when replacing ranges. The type
415/// understands using signed addition for unsigned results.
416#[derive(Debug, Default, Clone)]
417struct Delta(isize);
418
419impl Delta {
420 /// Add to this `Delta` the difference between the new length, `from`, and the old length, `to`.
421 fn add_diff(&mut self, from: usize, to: usize) {
422 self.0 = self.0.strict_add(
423 from.checked_signed_diff(to)
424 .expect("difference between usizes doesn't fit in an isize")
425 )
426 }
427
428 /// Apply this `Delta` to both elements of `range`.
429 fn apply_to(&self, range: &mut Range<usize>) {
430 range.start = range.start.strict_add_signed(self.0);
431 range.end = range.end.strict_add_signed(self.0);
432 }
433}