ct_regex_internal/expr/regex.rs
1use std::fmt::Debug;
2
3use crate::{haystack::{Haystack, HaystackItem, HaystackOf, IntoHaystack}, matcher::Matcher};
4use super::{CaptureFromRanges, IndexedCaptures};
5
6// TODO: Use iterator rather than Vec for return type.
7// TODO: Provide a method that returns a range too, not just a slice.
8// TODO: Switch to lazy rollback via iterators.
9
10/// A trait that is automatically implemented for types produced by the `regex!` macro. Various
11/// function are included that test this pattern against a provided [`Haystack`].
12///
13/// Altough rarely encountered, this trait's generic parameter, `I` refers to the item that can be
14/// matched individually from the provided `I::Slice`. This is used so that the same expression can
15/// be used to match various haystack types, including `&str` (`I = char`) and `&[u8]` (`I = u8`).
16/// Implementations for both of these slice/item pairs will be implemented by the macro.
17pub trait Regex<I: HaystackItem, const N: usize>: Debug {
18 type Pattern: Matcher<I>;
19
20 type Capture<'a, H: Haystack<'a>>: CaptureFromRanges<'a, H, N> where I: 'a;
21
22 /// Returns `true` if this Regex matches the **entire** haystack provided. This should probably
23 /// be the default _matching_ function to use.
24 ///
25 /// A similar behavior can be achieved by using start and end anchors in an expression and then
26 /// calling [`contains_match`](Self::contains_match). This function should be prefered however,
27 /// because it fails fast if the first character doesn't match.
28 ///
29 /// To check if this Regex matches and perform capturing, use [`do_capture`](Self::do_capture)
30 /// instead.
31 fn is_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
32 let mut hay = hay.into_haystack();
33
34 Self::Pattern::all_matches(&mut hay)
35 .iter()
36 .any(|state| hay.rollback(*state).is_end())
37 }
38
39 /// Returns `true` if this Regex matches any substring of the haystack provided. To retrieve the
40 /// actual substring itself, use [`slice_matching`](Self::slice_matching) or
41 /// [`find_capture`](Self::find_capture).
42 ///
43 /// Anchors can be used as a part of this Regex to perform more complex behaviors, but if you're
44 /// just wrapping an expression with `^` and `$`, see [`is_match`](Self::is_match) instead.
45 fn contains_match<'a, H: HaystackOf<'a, I>>(hay: impl IntoHaystack<'a, H>) -> bool {
46 let mut hay = hay.into_haystack();
47
48 while hay.item().is_some() {
49 let start = hay.index();
50
51 if Self::Pattern::all_matches(&mut hay).pop().is_some() {
52 return true;
53 }
54
55 hay.rollback(start).progress();
56 }
57 false
58 }
59
60 /// Returns the slice that matches this Regex first. This is the slicing variant of
61 /// [`contains_match`](Self::contains_match).
62 ///
63 /// This function runs through the Regex first and then the haystack. This has a slight semantic
64 /// difference in some scenarios.
65 ///
66 /// Note that there is no slicing equivalent of [`is_match`](Self::is_match), because any match
67 /// has to be the entire haystack.
68 fn slice_matching<'a, H: HaystackOf<'a, I>>(
69 hay: impl IntoHaystack<'a, H>
70 ) -> Option<H::Slice> {
71 let mut hay = hay.into_haystack();
72
73 while hay.item().is_some() {
74 let start = hay.index();
75
76 if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).pop() {
77 let cap = start..state_fork;
78 return Some(hay.slice(cap));
79 }
80
81 hay.rollback(start).progress()
82 }
83 None
84 }
85
86 /// Returns all slices of the provided haystack that match this Regex, optionally `overlapping`.
87 ///
88 /// This is the only match function that returns more than one result.
89 fn slice_all_matching<'a, H: HaystackOf<'a, I>>(
90 hay: impl IntoHaystack<'a, H>,
91 overlapping: bool
92 ) -> Vec<H::Slice> {
93 let mut hay = hay.into_haystack();
94
95 let mut all_matches = vec![];
96
97 while hay.item().is_some() {
98 let start = hay.index();
99
100 if overlapping {
101 if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).pop() {
102 all_matches.push(start..state_fork);
103 }
104
105 hay.rollback(start).progress();
106 } else {
107 if let Some(state_fork) = Self::Pattern::all_matches(&mut hay).pop() {
108 all_matches.push(start..state_fork);
109 hay.rollback(state_fork);
110
111 // This doesn't seem to make a difference...
112 debug_assert_ne!(start, state_fork)
113 // if start == state_fork {
114 // // We've already matched at this index.
115 // hay.progress();
116 // }
117 } else {
118 hay.rollback(start).progress();
119 }
120 }
121 }
122
123 all_matches.into_iter().map(|m| hay.slice(m)).collect()
124 }
125
126 /// Returns a [`Self::Capture`] representing the provided haystack matched against this Regex.
127 /// This includes any named or numbered capturing groups in the expression. As with
128 /// [`is_match`](Self::is_match), this function acts on the entire haystack, and needs to match
129 /// every character from start to end.
130 ///
131 /// Provides the same result as [`find_capture`](Self::find_capture) with start and end anchors,
132 /// although without needing to check any non-starting substring.
133 fn do_capture<'a, H: HaystackOf<'a, I>>(
134 hay: impl IntoHaystack<'a, H>
135 ) -> Option<Self::Capture<'a, H>> {
136 let mut hay = hay.into_haystack();
137
138 let mut caps = IndexedCaptures::default();
139
140 let start = hay.index();
141
142 let all_captures = Self::Pattern::all_captures(&mut hay, &mut caps)
143 .into_iter()
144 .rev();
145
146 for (state_fork, mut caps_fork) in all_captures {
147 if hay.rollback(state_fork).is_end() {
148 caps_fork.push(0, start..state_fork);
149
150 return Some(
151 Self::Capture::from_ranges(caps_fork.into_array(), hay)
152 .expect("failed to convert captures despite matching correctly")
153 );
154 }
155 }
156 return None;
157 }
158
159 /// Returns the [`Self::Capture`] that matches this Regex first, similar to
160 /// [`slice_matching`](Self::slice_matching) but with any named or numbered groups included.
161 ///
162 /// Anchors should be used for complex behavior, beyond unconditional start and end matches. See
163 /// [`do_capture`](Self::do_capture) instead to capture a full haystack.
164 fn find_capture<'a, H: HaystackOf<'a, I>>(
165 hay: impl IntoHaystack<'a, H>
166 ) -> Option<Self::Capture<'a, H>> {
167 let mut hay = hay.into_haystack();
168
169 let mut caps = IndexedCaptures::default();
170
171 while hay.item().is_some() {
172 let start = hay.index();
173
174 let first = Self::Pattern::all_captures(&mut hay.clone(), &mut caps)
175 .into_iter()
176 .last();
177
178 if let Some((state_fork, mut caps_fork)) = first {
179 caps_fork.push(0, start..state_fork);
180 hay.rollback(state_fork);
181
182 return Some(
183 Self::Capture::from_ranges(caps_fork.into_array(), hay)
184 .expect("failed to convert captures despite matching correctly")
185 );
186 }
187 hay.progress()
188 }
189 None
190 }
191
192 /// Returns a [`Self::Capture`] representing every full match of this Regex in the provided
193 /// haystack, similar to [`slice_all_matching`](Self::slice_all_matching). This can optionally
194 /// include `overlapping` matches.
195 fn find_all_captures<'a, H: HaystackOf<'a, I>>(
196 hay: impl IntoHaystack<'a, H>,
197 overlapping: bool
198 ) -> Vec<Self::Capture<'a, H>> {
199 todo!("find_all_matches equivalent ({:?}, {:?})", hay.into_haystack(), overlapping)
200 }
201}