Skip to main content

ct_regex_internal/haystack/
interface.rs

1use std::fmt::Debug;
2use std::ops::Range;
3
4use crate::haystack::HaystackItem;
5
6/// The main underlying trait for [`Haystack`] types, `HaystackIter` should be implemented on new
7/// types that understand slicing and iterating over a haystack that can be sliced into instances of
8/// `Self::Slice`.
9///
10/// For unicode-based haystacks like [`&str`](str), the implementing type needs to be able to deal
11/// with the contained variable width code points.
12///
13/// This trait requires that implementors also implement
14/// [`Iterator<Item = Self::Slice::Item>`](Iterator). When [`Iterator::next`] is called, on a
15/// `HaystackIter` it should return the same value that previous calls to
16/// [`current_item`](Self::current_item) have, before progressing the index to the next item. When
17/// the last item has been returned by `next`, the iterators should return None. Any future calls
18/// should avoid incrementing the index.
19///
20/// Additionally, `HaystackIter`s should be cheap to clone and able to produce and restore an index
21/// representing the current position.
22///
23/// Although possible, there is no point implementing a `HaystackIter` that shares a `Slice` with
24/// another `HaystackIter`.
25pub trait HaystackIter<'a>: Debug + Clone
26    + Iterator<Item = <Self::Slice as HaystackSlice<'a>>::Item>
27{
28    /// The `HaystackSlice` returned by this type when slicing the underlying haystack. This type is
29    /// usually also contained within the implementor used to create an instance via
30    /// [`IntoHaystack`].
31    type Slice: HaystackSlice<'a>;
32
33    /// Returns the item currently being matched in the haystack. Repeatedly calling this method
34    /// should return the same item, until progressed with [`Iterator::next`].
35    fn current_item(&self) -> Option<Self::Item>;
36
37    fn prev_item(&self) -> Option<Self::Item>;
38
39    /// Returns the index of the current item in the original haystack. The returned value should be
40    /// valid to pass to [`Self::go_to`] without causing a panic.
41    fn current_index(&self) -> usize;
42
43    /// Returns the underlying slice, as it was when this `HaystackIter` was created - representing
44    /// the entire haystack being matched against.
45    fn whole_slice(&self) -> Self::Slice;
46
47    /// Returns the remaining contents of this haystack, as a `Slice`. For slice based haystacks,
48    /// this is can be implemented as `&self.inner[self.index..]`.
49    fn remainder_as_slice(&self) -> Self::Slice;
50
51    /// Restores the `index` of the haystack to the provided one. This should only be called with
52    /// indexes obtained by calling [`current_index`](Self::current_index) on this `HaystackIter`.
53    fn go_to(&mut self, index: usize);
54}
55
56/// A trait representing a slice of the underlying haystack for various [`Haystack`] types.
57///
58/// The implementor of this trait is usually but not always, the only implementor of
59/// [`IntoHaystack`] for a haystack type.
60///
61/// It should be noted that this trait is often implemented of a reference to the type in question,
62/// e.g. `&str` or `&[u8]` rather than `str` or `[u8]` themselves, so that the implementing type can
63/// be cloned as required.
64pub trait HaystackSlice<'a>: Debug + Clone + Sized + ToOwned {
65    /// The `HaystackItem` contained within this slice.
66    type Item: HaystackItem;
67
68    /// Slices the underlying slice with the provided (half-open) `range`, used for retrieving
69    /// values of capture groups.
70    fn slice_with(&self, range: Range<usize>) -> Self;
71}
72
73/// A trait used to interface the haystack types use when matching of capturing against a
74/// [`Regex`](crate::expr::Regex), including tracking progression and slicing captures.
75///
76/// It is rare that users will have to interact with this trait, appart from Trait bounds. All
77/// public methods will take an `impl IntoHaystack<'a, H>` as an argument.
78///
79/// `Haystack` is accompanied by another trait, [`HaystackItem`], representing items that can be
80/// matched against a [`Regex`](crate::expr::Regex).
81///
82/// `Haystack`s are stateful and therefore can't be matched against multiple times without being
83/// [`reset`](Self::reset) first, or they will continue where the first pattern finished. They store
84/// their state as a `usize`, which can be obtained via [`index`](Self::index) and restored via
85/// [`rollback`](Self::rollback). Additionally, `Haystack`s are cheap to clone, relying on shallow
86/// clones or reference counting.
87pub trait Haystack<'a>: HaystackIter<'a> {
88    fn item(&self) -> Option<Self::Item> {
89        self.current_item()
90    }
91
92    fn index(&self) -> usize {
93        self.current_index()
94    }
95
96    // Progression is only completed by elements which explicitly check the byte and succeed.
97    fn progress(&mut self) {
98        self.next();
99    }
100
101    fn inner_slice(&self) -> Self::Slice {
102        self.whole_slice()
103    }
104
105    fn slice_with(&self, range: Range<usize>) -> Self::Slice {
106        self.inner_slice().slice_with(range)
107    }
108
109    fn reset(&mut self) {
110        self.go_to(0);
111    }
112
113    fn rollback(&mut self, state: usize) -> &mut Self {
114        self.go_to(state);
115        self
116    }
117
118    fn is_start(&self) -> bool {
119        self.current_index() == 0
120    }
121
122    fn is_end(&self) -> bool {
123        self.item().is_none()
124    }
125
126    fn is_line_start(&self) -> bool {
127        self.prev_item().is_none_or(HaystackItem::is_newline)
128    }
129
130    fn is_line_end(&self) -> bool {
131        self.item().is_none_or(HaystackItem::is_newline)
132    }
133
134    fn is_crlf_start(&self) -> bool {
135        match self.prev_item() {
136            Some(n) if n.is_newline() => true,
137            Some(r) if r.is_return() => !self.item().is_some_and(HaystackItem::is_newline),
138            Some(_) => false,
139            None => true,
140        }
141    }
142
143    fn is_crlf_end(&self) -> bool {
144        // TODO: Clarify semantics surrounding "\r?(EndCRLF)"
145        match self.item() {
146            Some(n) if n.is_newline() => !self.prev_item().is_some_and(HaystackItem::is_return),
147            Some(r) if r.is_return() => true,
148            Some(_) => false,
149            None => true,
150        }
151    }
152}
153
154impl<'a, T: HaystackIter<'a>> Haystack<'a> for T {}
155
156/// This trait is exactly the same as [`Haystack`], except that it simplifies bounds by requiring
157/// that `Item = I`.
158///
159/// It is also blanket-implemented for all types that implement `Haystack<Item = I>`.
160pub trait HaystackOf<'a, I: HaystackItem>: Haystack<'a, Slice: HaystackSlice<'a, Item = I>> {}
161
162impl<'a, I, T> HaystackOf<'a, I> for T
163where
164    I: HaystackItem,
165    T: Haystack<'a, Slice<>: HaystackSlice<'a, Item = I>>
166{}
167
168/// A trait that is responsible for converting a slice into a stateful [`Haystack`], of type `H`.
169/// The primary intent of this trait is to allow users to avoid creating their own `Haystack`,
170/// instead passing a slice to methods on [`Regex`](crate::expr::Regex).
171///
172/// If creating a new `Haystack` type, this trait should be implemented manually so that all types
173/// can be inferred properly.
174pub trait IntoHaystack<'a, H: Haystack<'a>> {
175    /// Creates a new [`Haystack`] from self.
176    fn into_haystack(self) -> H;
177}
178
179impl<'a, H: Haystack<'a>> IntoHaystack<'a, H> for H {
180    fn into_haystack(self) -> H {
181        self
182    }
183}
184
185// Avoid a blanket implementation here so that users don't have to specify types.
186// impl<'a, I: HaystackItem, H: Haystack<'a, I>> IntoHaystack<'a, I, H> for H::Slice {
187//     fn into_haystack(self) -> H {
188//         <H as HaystackIter>::from_slice(self)
189//     }
190// }
191
192/// A trait representing an owned, mutable type that can be converted into a [`Haystack`] as
193/// required. This allows for [`Regex`](crate::expr::Regex) methods that replace matches or captures
194/// from the original `Haystack`.
195///
196/// It is also used as the return type of the closures take by a couple of `Regex` replace methods.
197#[allow(clippy::len_without_is_empty)]
198pub trait OwnedHaystackable<I: HaystackItem> {
199    type Hay<'a>: HaystackOf<'a, I> where Self: 'a;
200
201    fn replace_range<'a>(
202        &mut self,
203        range: Range<usize>,
204        with: <Self::Hay<'a> as HaystackIter<'a>>::Slice
205    ) where Self: 'a;
206
207    fn as_haystack<'a>(&'a self) -> Self::Hay<'a>;
208
209    fn as_slice<'a>(&'a self) -> <Self::Hay<'a> as HaystackIter<'a>>::Slice;
210
211    fn len(&self) -> usize;
212}