ct_regex_internal/haystack/interface.rs
1use std::fmt::Debug;
2use std::ops::Range;
3
4use crate::haystack::HaystackItem;
5
6/// The main underlying trait for [`Haystack`] types, `HaystackIter` should be implemented on new
7/// types that understand slicing and iterating over a haystack that can be sliced into instances of
8/// `Self::Slice`.
9///
10/// For unicode-based haystacks like [`&str`](str), the implementing type needs to be able to deal
11/// with the contained variable width code points.
12///
13/// This trait requires that implementors also implement
14/// [`Iterator<Item = Self::Slice::Item>`](Iterator). When [`Iterator::next`] is called, on a
15/// `HaystackIter` it should return the same value that previous calls to
16/// [`current_item`](Self::current_item) have, before progressing the index to the next item. When
17/// the last item has been returned by `next`, the iterators should return None. Any future calls
18/// should avoid incrementing the index.
19///
20/// Additionally, `HaystackIter`s should be cheap to clone and able to produce and restore an index
21/// representing the current position.
22///
23/// Although possible, there is no point implementing a `HaystackIter` that shares a `Slice` with
24/// another `HaystackIter`.
25pub trait HaystackIter<'a>: Debug + Clone
26 + Iterator<Item = <Self::Slice as HaystackSlice<'a>>::Item>
27{
28 /// The `HaystackSlice` returned by this type when slicing the underlying haystack. This type is
29 /// usually also contained within the implementor used to create an instance via
30 /// [`IntoHaystack`].
31 type Slice: HaystackSlice<'a>;
32
33 /// Returns the item currently being matched in the haystack. Repeatedly calling this method
34 /// should return the same item, until progressed with [`Iterator::next`].
35 fn current_item(&self) -> Option<Self::Item>;
36
37 fn prev_item(&self) -> Option<Self::Item>;
38
39 /// Returns the index of the current item in the original haystack. The returned value should be
40 /// valid to pass to [`Self::go_to`] without causing a panic.
41 fn current_index(&self) -> usize;
42
43 /// Returns the underlying slice, as it was when this `HaystackIter` was created - representing
44 /// the entire haystack being matched against.
45 fn whole_slice(&self) -> Self::Slice;
46
47 /// Returns the remaining contents of this haystack, as a `Slice`. For slice based haystacks,
48 /// this is can be implemented as `&self.inner[self.index..]`.
49 fn remainder_as_slice(&self) -> Self::Slice;
50
51 /// Restores the `index` of the haystack to the provided one. This should only be called with
52 /// indexes obtained by calling [`current_index`](Self::current_index) on this `HaystackIter`.
53 fn go_to(&mut self, index: usize);
54}
55
56/// A trait representing a slice of the underlying haystack for various [`Haystack`] types.
57///
58/// The implementor of this trait is usually but not always, the only implementor of
59/// [`IntoHaystack`] for a haystack type.
60///
61/// It should be noted that this trait is often implemented of a reference to the type in question,
62/// e.g. `&str` or `&[u8]` rather than `str` or `[u8]` themselves, so that the implementing type can
63/// be cloned as required.
64pub trait HaystackSlice<'a>: Debug + Clone + Sized + ToOwned {
65 /// The `HaystackItem` contained within this slice.
66 type Item: HaystackItem;
67
68 /// Slices the underlying slice with the provided (half-open) `range`, used for retrieving
69 /// values of capture groups.
70 fn slice_with(&self, range: Range<usize>) -> Self;
71}
72
73/// A trait used to interface the haystack types use when matching of capturing against a
74/// [`Regex`](crate::expr::Regex), including tracking progression and slicing captures.
75///
76/// It is rare that users will have to interact with this trait, appart from Trait bounds. All
77/// public methods will take an `impl IntoHaystack<'a, H>` as an argument.
78///
79/// `Haystack` is accompanied by another trait, [`HaystackItem`], representing items that can be
80/// matched against a [`Regex`](crate::expr::Regex).
81///
82/// `Haystack`s are stateful and therefore can't be matched against multiple times without being
83/// [`reset`](Self::reset) first, or they will continue where the first pattern finished. They store
84/// their state as a `usize`, which can be obtained via [`index`](Self::index) and restored via
85/// [`rollback`](Self::rollback). Additionally, `Haystack`s are cheap to clone, relying on shallow
86/// clones or reference counting.
87pub trait Haystack<'a>: HaystackIter<'a> {
88 fn item(&self) -> Option<Self::Item> {
89 self.current_item()
90 }
91
92 fn index(&self) -> usize {
93 self.current_index()
94 }
95
96 // Progression is only completed by elements which explicitly check the byte and succeed.
97 fn progress(&mut self) {
98 self.next();
99 }
100
101 fn inner_slice(&self) -> Self::Slice {
102 self.whole_slice()
103 }
104
105 fn slice_with(&self, range: Range<usize>) -> Self::Slice {
106 self.inner_slice().slice_with(range)
107 }
108
109 fn reset(&mut self) {
110 self.go_to(0);
111 }
112
113 fn rollback(&mut self, state: usize) -> &mut Self {
114 self.go_to(state);
115 self
116 }
117
118 fn is_start(&self) -> bool {
119 self.current_index() == 0
120 }
121
122 fn is_end(&self) -> bool {
123 self.item().is_none()
124 }
125
126 fn is_line_start(&self) -> bool {
127 self.prev_item().is_none_or(HaystackItem::is_newline)
128 }
129
130 fn is_line_end(&self) -> bool {
131 self.item().is_none_or(HaystackItem::is_newline)
132 }
133
134 fn is_crlf_start(&self) -> bool {
135 match self.prev_item() {
136 Some(n) if n.is_newline() => true,
137 Some(r) if r.is_return() => !self.item().is_some_and(HaystackItem::is_newline),
138 Some(_) => false,
139 None => true,
140 }
141 }
142
143 fn is_crlf_end(&self) -> bool {
144 // TODO: Clarify semantics surrounding "\r?(EndCRLF)"
145 match self.item() {
146 Some(n) if n.is_newline() => !self.prev_item().is_some_and(HaystackItem::is_return),
147 Some(r) if r.is_return() => true,
148 Some(_) => false,
149 None => true,
150 }
151 }
152}
153
154impl<'a, T: HaystackIter<'a>> Haystack<'a> for T {}
155
156/// This trait is exactly the same as [`Haystack`], except that it simplifies bounds by requiring
157/// that `Item = I`.
158///
159/// It is also blanket-implemented for all types that implement `Haystack<Item = I>`.
160pub trait HaystackOf<'a, I: HaystackItem>: Haystack<'a, Slice: HaystackSlice<'a, Item = I>> {}
161
162impl<'a, I, T> HaystackOf<'a, I> for T
163where
164 I: HaystackItem,
165 T: Haystack<'a, Slice<>: HaystackSlice<'a, Item = I>>
166{}
167
168/// A trait that is responsible for converting a slice into a stateful [`Haystack`], of type `H`.
169/// The primary intent of this trait is to allow users to avoid creating their own `Haystack`,
170/// instead passing a slice to methods on [`Regex`](crate::expr::Regex).
171///
172/// If creating a new `Haystack` type, this trait should be implemented manually so that all types
173/// can be inferred properly.
174pub trait IntoHaystack<'a, H: Haystack<'a>> {
175 /// Creates a new [`Haystack`] from self.
176 fn into_haystack(self) -> H;
177}
178
179impl<'a, H: Haystack<'a>> IntoHaystack<'a, H> for H {
180 fn into_haystack(self) -> H {
181 self
182 }
183}
184
185// Avoid a blanket implementation here so that users don't have to specify types.
186// impl<'a, I: HaystackItem, H: Haystack<'a, I>> IntoHaystack<'a, I, H> for H::Slice {
187// fn into_haystack(self) -> H {
188// <H as HaystackIter>::from_slice(self)
189// }
190// }
191
192/// A trait representing an owned, mutable type that can be converted into a [`Haystack`] as
193/// required. This allows for [`Regex`](crate::expr::Regex) methods that replace matches or captures
194/// from the original `Haystack`.
195///
196/// It is also used as the return type of the closures take by a couple of `Regex` replace methods.
197#[allow(clippy::len_without_is_empty)]
198pub trait OwnedHaystackable<I: HaystackItem> {
199 type Hay<'a>: HaystackOf<'a, I> where Self: 'a;
200
201 fn replace_range<'a>(
202 &mut self,
203 range: Range<usize>,
204 with: <Self::Hay<'a> as HaystackIter<'a>>::Slice
205 ) where Self: 'a;
206
207 fn as_haystack<'a>(&'a self) -> Self::Hay<'a>;
208
209 fn as_slice<'a>(&'a self) -> <Self::Hay<'a> as HaystackIter<'a>>::Slice;
210
211 fn len(&self) -> usize;
212}