Skip to main content

ct_regex_internal/haystack/
interface.rs

1use std::fmt::Debug;
2use std::ops::Range;
3
4use crate::haystack::HaystackItem;
5
6/// The main underlying trait for [`Haystack`] types, `HaystackIter` should be implemented on new
7/// types that understand slicing and iterating over a haystack that can be sliced into instances of
8/// `Self::Slice`.
9///
10/// For unicode-based haystacks like [`&str`](str), the implementing type needs to be able to deal
11/// with the contained variable width code points.
12///
13/// This trait requires that implementers also implement
14/// [`Iterator<Item = Self::Slice::Item>`](Iterator). When [`Iterator::next`] is called, on a
15/// `HaystackIter` it should return the same value that previous calls to
16/// [`current_item`](Self::current_item) have, before progressing the index to the next item. When
17/// the last item has been returned by `next`, the iterators should return None. Any future calls
18/// should avoid incrementing the index.
19///
20/// Additionally, `HaystackIter`s should be cheap to clone and able to produce and restore an index
21/// representing the current position.
22///
23/// Although possible, there is no point implementing a `HaystackIter` that shares a `Slice` with
24/// another `HaystackIter`.
25pub trait HaystackIter<'a>: Debug + Clone
26    + Iterator<Item = <Self::Slice as HaystackSlice<'a>>::Item>
27{
28    /// The `HaystackSlice` returned by this type when slicing the underlying haystack. This type is
29    /// usually also contained within the implementer used to create an instance via
30    /// [`IntoHaystack`].
31    type Slice: HaystackSlice<'a>;
32
33    /// Returns the item currently being matched in the haystack. Repeatedly calling this method
34    /// should return the same item, until progressed with [`Iterator::next`].
35    fn current_item(&self) -> Option<Self::Item>;
36
37    /// Returns the item last matched in the haystack without making any changes.
38    fn prev_item(&self) -> Option<Self::Item>;
39
40    /// Returns the index of the current item in the original haystack. The returned value should be
41    /// valid to pass to [`Self::go_to`] without causing a panic.
42    fn current_index(&self) -> usize;
43
44    /// Returns the underlying slice, as it was when this `HaystackIter` was created - representing
45    /// the entire haystack being matched against.
46    fn whole_slice(&self) -> Self::Slice;
47
48    /// Returns the remaining contents of this haystack, as a `Slice`. For slice based haystacks,
49    /// this is can be implemented as `&self.inner[self.index..]`.
50    fn remainder_as_slice(&self) -> Self::Slice;
51
52    /// Restores the `index` of the haystack to the provided one. This should only be called with
53    /// indexes obtained by calling [`current_index`](Self::current_index) on this `HaystackIter`.
54    fn go_to(&mut self, index: usize);
55}
56
57/// A trait representing a slice of the underlying haystack for various [`Haystack`] types.
58///
59/// The implementer of this trait is usually but not always, the only implementer of
60/// [`IntoHaystack`] for a haystack type.
61///
62/// It should be noted that this trait is often implemented of a reference to the type in question,
63/// e.g. `&str` or `&[u8]` rather than `str` or `[u8]` themselves, so that the implementing type can
64/// be cloned as required.
65pub trait HaystackSlice<'a>: Debug + Clone + Sized + ToOwned {
66    /// The `HaystackItem` contained within this slice.
67    type Item: HaystackItem;
68
69    /// Slices the underlying slice with the provided (half-open) `range`, used for retrieving
70    /// values of capture groups.
71    fn slice_with(&self, range: Range<usize>) -> Self;
72}
73
74/// A trait used to interface the haystack types use when matching of capturing against a
75/// [`Regex`](crate::expr::Regex), including tracking progression and slicing captures.
76///
77/// It is rare that users will have to interact with this trait, apart from Trait bounds. All public
78/// methods will take an `impl IntoHaystack<'a, H>` as an argument.
79///
80/// `Haystack` is accompanied by another trait, [`HaystackItem`], representing items that can be
81/// matched against a [`Regex`](crate::expr::Regex).
82///
83/// `Haystack`s are stateful and therefore can't be matched against multiple times without being
84/// [`reset`](Self::reset) first, or they will continue where the first pattern finished. They store
85/// their state as a `usize`, which can be obtained via [`index`](Self::index) and restored via
86/// [`rollback`](Self::rollback). Additionally, `Haystack`s are cheap to clone, relying on shallow
87/// clones or reference counting.
88pub trait Haystack<'a>: HaystackIter<'a> {
89    fn item(&self) -> Option<Self::Item> {
90        self.current_item()
91    }
92
93    fn index(&self) -> usize {
94        self.current_index()
95    }
96
97    // Progression is only completed by elements which explicitly check the byte and succeed.
98    fn progress(&mut self) {
99        self.next();
100    }
101
102    fn inner_slice(&self) -> Self::Slice {
103        self.whole_slice()
104    }
105
106    fn slice_with(&self, range: Range<usize>) -> Self::Slice {
107        self.inner_slice().slice_with(range)
108    }
109
110    fn reset(&mut self) {
111        self.go_to(0);
112    }
113
114    fn rollback(&mut self, state: usize) -> &mut Self {
115        self.go_to(state);
116        self
117    }
118
119    fn is_start(&self) -> bool {
120        self.current_index() == 0
121    }
122
123    fn is_end(&self) -> bool {
124        self.item().is_none()
125    }
126
127    fn is_line_start(&self) -> bool {
128        self.prev_item().is_none_or(HaystackItem::is_newline)
129    }
130
131    fn is_line_end(&self) -> bool {
132        self.item().is_none_or(HaystackItem::is_newline)
133    }
134
135    fn is_crlf_start(&self) -> bool {
136        match self.prev_item() {
137            Some(n) if n.is_newline() => true,
138            Some(r) if r.is_return() => !self.item().is_some_and(HaystackItem::is_newline),
139            Some(_) => false,
140            None => true,
141        }
142    }
143
144    fn is_crlf_end(&self) -> bool {
145        // TODO: Clarify semantics surrounding "\r?(EndCRLF)"
146        match self.item() {
147            Some(n) if n.is_newline() => !self.prev_item().is_some_and(HaystackItem::is_return),
148            Some(r) if r.is_return() => true,
149            Some(_) => false,
150            None => true,
151        }
152    }
153}
154
155impl<'a, T: HaystackIter<'a>> Haystack<'a> for T {}
156
157/// This trait is exactly the same as [`Haystack`], except that it simplifies bounds by requiring
158/// that `Item = I`.
159///
160/// It is also blanket-implemented for all types that implement `Haystack<Item = I>`.
161pub trait HaystackOf<'a, I: HaystackItem>: Haystack<'a, Slice: HaystackSlice<'a, Item = I>> {}
162
163impl<'a, I, T> HaystackOf<'a, I> for T
164where
165    I: HaystackItem,
166    T: Haystack<'a, Slice<>: HaystackSlice<'a, Item = I>>
167{}
168
169/// A trait that is responsible for converting a slice into a stateful [`Haystack`], of type `H`.
170/// The primary intent of this trait is to allow users to avoid creating their own `Haystack`,
171/// instead passing a slice to methods on [`Regex`](crate::expr::Regex).
172///
173/// If creating a new `Haystack` type, this trait should be implemented manually so that all types
174/// can be inferred properly.
175pub trait IntoHaystack<'a, H: Haystack<'a>> {
176    /// Creates a new [`Haystack`] from self. The result should be initialized at index 0.
177    fn into_haystack(self) -> H;
178}
179
180impl<'a, H: Haystack<'a>> IntoHaystack<'a, H> for H {
181    fn into_haystack(self) -> H {
182        self
183    }
184}
185
186// Avoid a blanket implementation here so that users don't have to specify types.
187// impl<'a, I: HaystackItem, H: Haystack<'a, I>> IntoHaystack<'a, I, H> for H::Slice {
188//     fn into_haystack(self) -> H {
189//         <H as HaystackIter>::from_slice(self)
190//     }
191// }
192
193/// A trait representing an owned, mutable type that can be converted into a [`Haystack`] as
194/// required. This allows for [`Regex`](crate::expr::Regex) methods that replace matches or captures
195/// from the original `Haystack`.
196///
197/// It is also used as the return type of the closures take by a couple of `Regex` replace methods.
198pub trait OwnedHaystackable<I: HaystackItem> {
199    type Hay<'a>: HaystackOf<'a, I> where Self: 'a;
200
201    /// Replaces the substring at the position indicated by `range` with the `replacement`
202    /// [`HaystackSlice`].
203    fn replace_range<'a>(
204        &mut self,
205        range: Range<usize>,
206        replacement: <Self::Hay<'a> as HaystackIter<'a>>::Slice
207    ) where Self: 'a;
208
209    /// Creates a temporary [`Haystack`] out of the underlying slice. This should usually be done by
210    /// borrowing (or cloning if reference counted) and calling [`IntoHaystack::into_haystack`].
211    fn as_haystack<'a>(&'a self) -> Self::Hay<'a>;
212
213    /// Borrows the underlying [`HaystackSlice`] without creating a haystack. Used for slicing
214    /// substrings. Note that `HaystackSlice` is inherently borrowed and probably be implemented for
215    /// a reference.
216    fn as_slice<'a>(&'a self) -> <Self::Hay<'a> as HaystackIter<'a>>::Slice;
217
218    /// Returns the length of the underlying slice.
219    fn len(&self) -> usize;
220
221    /// Returns true if the underlying slice is empty.
222    fn is_empty(&self) -> bool {
223        self.len() == 0
224    }
225}