ct_regex_internal/haystack/interface.rs
1use std::fmt::Debug;
2use std::ops::Range;
3
4use crate::haystack::HaystackItem;
5
6/// The main underlying trait for [`Haystack`] types, `HaystackIter` should be implemented on new
7/// types that understand slicing and iterating over a haystack that can be sliced into instances of
8/// `Self::Slice`.
9///
10/// For unicode-based haystacks like [`&str`](str), the implementing type needs to be able to deal
11/// with the contained variable width code points.
12///
13/// This trait requires that implementers also implement
14/// [`Iterator<Item = Self::Slice::Item>`](Iterator). When [`Iterator::next`] is called, on a
15/// `HaystackIter` it should return the same value that previous calls to
16/// [`current_item`](Self::current_item) have, before progressing the index to the next item. When
17/// the last item has been returned by `next`, the iterators should return None. Any future calls
18/// should avoid incrementing the index.
19///
20/// Additionally, `HaystackIter`s should be cheap to clone and able to produce and restore an index
21/// representing the current position.
22///
23/// Although possible, there is no point implementing a `HaystackIter` that shares a `Slice` with
24/// another `HaystackIter`.
25pub trait HaystackIter<'a>: Debug + Clone
26 + Iterator<Item = <Self::Slice as HaystackSlice<'a>>::Item>
27{
28 /// The `HaystackSlice` returned by this type when slicing the underlying haystack. This type is
29 /// usually also contained within the implementer used to create an instance via
30 /// [`IntoHaystack`].
31 type Slice: HaystackSlice<'a>;
32
33 /// Returns the item currently being matched in the haystack. Repeatedly calling this method
34 /// should return the same item, until progressed with [`Iterator::next`].
35 fn current_item(&self) -> Option<Self::Item>;
36
37 /// Returns the item last matched in the haystack without making any changes.
38 fn prev_item(&self) -> Option<Self::Item>;
39
40 /// Returns the index of the current item in the original haystack. The returned value should be
41 /// valid to pass to [`Self::go_to`] without causing a panic.
42 fn current_index(&self) -> usize;
43
44 /// Returns the underlying slice, as it was when this `HaystackIter` was created - representing
45 /// the entire haystack being matched against.
46 fn whole_slice(&self) -> Self::Slice;
47
48 /// Returns the remaining contents of this haystack, as a `Slice`. For slice based haystacks,
49 /// this is can be implemented as `&self.inner[self.index..]`.
50 fn remainder_as_slice(&self) -> Self::Slice;
51
52 /// Restores the `index` of the haystack to the provided one. This should only be called with
53 /// indexes obtained by calling [`current_index`](Self::current_index) on this `HaystackIter`.
54 fn go_to(&mut self, index: usize);
55}
56
57/// A trait representing a slice of the underlying haystack for various [`Haystack`] types.
58///
59/// The implementer of this trait is usually but not always, the only implementer of
60/// [`IntoHaystack`] for a haystack type.
61///
62/// It should be noted that this trait is often implemented of a reference to the type in question,
63/// e.g. `&str` or `&[u8]` rather than `str` or `[u8]` themselves, so that the implementing type can
64/// be cloned as required.
65pub trait HaystackSlice<'a>: Debug + Clone + Sized + ToOwned {
66 /// The `HaystackItem` contained within this slice.
67 type Item: HaystackItem;
68
69 /// Slices the underlying slice with the provided (half-open) `range`, used for retrieving
70 /// values of capture groups.
71 fn slice_with(&self, range: Range<usize>) -> Self;
72}
73
74/// A trait used to interface the haystack types use when matching of capturing against a
75/// [`Regex`](crate::expr::Regex), including tracking progression and slicing captures.
76///
77/// It is rare that users will have to interact with this trait, apart from Trait bounds. All public
78/// methods will take an `impl IntoHaystack<'a, H>` as an argument.
79///
80/// `Haystack` is accompanied by another trait, [`HaystackItem`], representing items that can be
81/// matched against a [`Regex`](crate::expr::Regex).
82///
83/// `Haystack`s are stateful and therefore can't be matched against multiple times without being
84/// [`reset`](Self::reset) first, or they will continue where the first pattern finished. They store
85/// their state as a `usize`, which can be obtained via [`index`](Self::index) and restored via
86/// [`rollback`](Self::rollback). Additionally, `Haystack`s are cheap to clone, relying on shallow
87/// clones or reference counting.
88pub trait Haystack<'a>: HaystackIter<'a> {
89 fn item(&self) -> Option<Self::Item> {
90 self.current_item()
91 }
92
93 fn index(&self) -> usize {
94 self.current_index()
95 }
96
97 // Progression is only completed by elements which explicitly check the byte and succeed.
98 fn progress(&mut self) {
99 self.next();
100 }
101
102 fn inner_slice(&self) -> Self::Slice {
103 self.whole_slice()
104 }
105
106 fn slice_with(&self, range: Range<usize>) -> Self::Slice {
107 self.inner_slice().slice_with(range)
108 }
109
110 fn reset(&mut self) {
111 self.go_to(0);
112 }
113
114 fn rollback(&mut self, state: usize) -> &mut Self {
115 self.go_to(state);
116 self
117 }
118
119 fn is_start(&self) -> bool {
120 self.current_index() == 0
121 }
122
123 fn is_end(&self) -> bool {
124 self.item().is_none()
125 }
126
127 fn is_line_start(&self) -> bool {
128 self.prev_item().is_none_or(HaystackItem::is_newline)
129 }
130
131 fn is_line_end(&self) -> bool {
132 self.item().is_none_or(HaystackItem::is_newline)
133 }
134
135 fn is_crlf_start(&self) -> bool {
136 match self.prev_item() {
137 Some(n) if n.is_newline() => true,
138 Some(r) if r.is_return() => !self.item().is_some_and(HaystackItem::is_newline),
139 Some(_) => false,
140 None => true,
141 }
142 }
143
144 fn is_crlf_end(&self) -> bool {
145 // TODO: Clarify semantics surrounding "\r?(EndCRLF)"
146 match self.item() {
147 Some(n) if n.is_newline() => !self.prev_item().is_some_and(HaystackItem::is_return),
148 Some(r) if r.is_return() => true,
149 Some(_) => false,
150 None => true,
151 }
152 }
153}
154
155impl<'a, T: HaystackIter<'a>> Haystack<'a> for T {}
156
157/// This trait is exactly the same as [`Haystack`], except that it simplifies bounds by requiring
158/// that `Item = I`.
159///
160/// It is also blanket-implemented for all types that implement `Haystack<Item = I>`.
161pub trait HaystackOf<'a, I: HaystackItem>: Haystack<'a, Slice: HaystackSlice<'a, Item = I>> {}
162
163impl<'a, I, T> HaystackOf<'a, I> for T
164where
165 I: HaystackItem,
166 T: Haystack<'a, Slice<>: HaystackSlice<'a, Item = I>>
167{}
168
169/// A trait that is responsible for converting a slice into a stateful [`Haystack`], of type `H`.
170/// The primary intent of this trait is to allow users to avoid creating their own `Haystack`,
171/// instead passing a slice to methods on [`Regex`](crate::expr::Regex).
172///
173/// If creating a new `Haystack` type, this trait should be implemented manually so that all types
174/// can be inferred properly.
175pub trait IntoHaystack<'a, H: Haystack<'a>> {
176 /// Creates a new [`Haystack`] from self. The result should be initialized at index 0.
177 fn into_haystack(self) -> H;
178}
179
180impl<'a, H: Haystack<'a>> IntoHaystack<'a, H> for H {
181 fn into_haystack(self) -> H {
182 self
183 }
184}
185
186// Avoid a blanket implementation here so that users don't have to specify types.
187// impl<'a, I: HaystackItem, H: Haystack<'a, I>> IntoHaystack<'a, I, H> for H::Slice {
188// fn into_haystack(self) -> H {
189// <H as HaystackIter>::from_slice(self)
190// }
191// }
192
193/// A trait representing an owned, mutable type that can be converted into a [`Haystack`] as
194/// required. This allows for [`Regex`](crate::expr::Regex) methods that replace matches or captures
195/// from the original `Haystack`.
196///
197/// It is also used as the return type of the closures take by a couple of `Regex` replace methods.
198pub trait OwnedHaystackable<I: HaystackItem> {
199 type Hay<'a>: HaystackOf<'a, I> where Self: 'a;
200
201 /// Replaces the substring at the position indicated by `range` with the `replacement`
202 /// [`HaystackSlice`].
203 fn replace_range<'a>(
204 &mut self,
205 range: Range<usize>,
206 replacement: <Self::Hay<'a> as HaystackIter<'a>>::Slice
207 ) where Self: 'a;
208
209 /// Creates a temporary [`Haystack`] out of the underlying slice. This should usually be done by
210 /// borrowing (or cloning if reference counted) and calling [`IntoHaystack::into_haystack`].
211 fn as_haystack<'a>(&'a self) -> Self::Hay<'a>;
212
213 /// Borrows the underlying [`HaystackSlice`] without creating a haystack. Used for slicing
214 /// substrings. Note that `HaystackSlice` is inherently borrowed and probably be implemented for
215 /// a reference.
216 fn as_slice<'a>(&'a self) -> <Self::Hay<'a> as HaystackIter<'a>>::Slice;
217
218 /// Returns the length of the underlying slice.
219 fn len(&self) -> usize;
220
221 /// Returns true if the underlying slice is empty.
222 fn is_empty(&self) -> bool {
223 self.len() == 0
224 }
225}