ct_regex_internal/haystack/
iter.rs

1use std::{fmt::{self, Debug}, ops::Range};
2
3use crate::haystack::{HaystackSlice, IntoHaystack};
4
5// TODO: Document cheap cloning requirement, usize state. Understand slicing and iterating, often
6// dealing with variable width unicode characters...
7
8/// The main underlying trait for [`Haystack`](crate::haystack::Haystack) types, `HaystackIter`
9/// should be implemented on new types that understand slicing and iterating over a haystack that
10/// can be sliced into instances of `Self::Slice`.
11///
12/// For unicode-based haystacks like [`&str`](str), the implementing type needs to be able to deal
13/// with the contained variable width code points.
14///
15/// This trait requires that implementors also implement
16/// [`Iterator<Item = Self::Slice::Item>`](Iterator). When [`Iterator::next`] is called, on a
17/// `HaystackIter` it should return the same value that previous calls to
18/// [`current_item`](Self::current_item) have, before progressing the index to the next item. When
19/// the last item has been returned by `next`, the iterators should return None. Any future calls
20/// should avoid incrementing the index.
21///
22/// Additionally, `HaystackIter`s should be cheap to clone and able to produce and restore an index
23/// representing the current position.
24///
25/// Although possible, there is no point implementing a `HaystackIter` that shares a `Slice` with
26/// another `HaystackIter`.
27pub trait HaystackIter<'a>: Debug + Clone
28    + Iterator<Item = <Self::Slice as HaystackSlice<'a>>::Item>
29{
30    /// The `HaystackSlice` returned by this type when slicing the underlying haystack. This type is
31    /// usually also contained within the implementor used to create an instance via
32    /// [`IntoHaystack`].
33    type Slice: HaystackSlice<'a>;
34
35    /// Returns the item currently being matched in the haystack. Repeatedly calling this method
36    /// should return the same item, until progressed with [`Iterator::next`].
37    fn current_item(&self) -> Option<Self::Item>;
38
39    /// Returns the index of the current item in the original haystack. The returned value should be
40    /// valid to pass to [`Self::go_to`] without causing a panic.
41    fn current_index(&self) -> usize;
42
43    /// Returns the underlying slice, as it was when this `HaystackIter` was created - representing
44    /// the entire haystack being matched against.
45    fn whole_slice(&self) -> Self::Slice;
46
47    /// Returns the remaining contents of this haystack, as a `Slice`. For slice based haystacks,
48    /// this is can be implemented as `&self.inner[self.index..]`.
49    fn remainder_as_slice(&self) -> Self::Slice;
50
51    /// Slices the original haystack with the provided (half-open) `range`, used for retrieving
52    /// values of capture groups.
53    fn slice_with(&self, range: Range<usize>) -> Self::Slice;
54
55    /// Restores the `index` of the haystack to the provided one. This should only be called with
56    /// indexes obtained by calling [`current_index`](Self::current_index) on this `HaystackIter`.
57    fn go_to(&mut self, index: usize);
58}
59
60/// A helper for getting the first `char` of a provided `&str`. Returns the width of the character
61/// (possibly zero) and the character itself.
62pub fn get_first_char(value: &str) -> (usize, Option<char>) {
63    // Unfortunately, I don't think there is a stable way to get `char`s from a `str` without using
64    // the `chars` or `char_indicies` iterators. We can calculate the width easily but may as well
65    // have it done for us.
66    let mut iter = value.char_indices();
67    let first = iter.next();
68    (iter.offset(), first.map(get_item))
69}
70
71fn get_item<I>((_, item): (usize, I)) -> I { item }
72
73/// A haystack type for matching against the [`char`]s in a [`&str`](str). This type abstracts over
74/// the variable width scalars contained, to allow indexing without panics.
75///
76/// To accomodate, calls to [`go_to`](Self::go_to) should only be made with an index previously
77/// produced by this type for the specific haystack. Failure to do so, may cause a panic if indexing
78/// on an invalid unicode boundary.
79#[derive(#[automatically_derived]
impl<'a> ::core::clone::Clone for StrStack<'a> {
    #[inline]
    fn clone(&self) -> StrStack<'a> {
        StrStack {
            inner: ::core::clone::Clone::clone(&self.inner),
            index: ::core::clone::Clone::clone(&self.index),
        }
    }
}Clone)]
80pub struct StrStack<'a> {
81    inner: &'a str,
82    index: usize,
83}
84
85impl<'a> IntoHaystack<'a, StrStack<'a>> for &'a str {
86    fn into_haystack(self) -> StrStack<'a> {
87        StrStack {
88            inner: self,
89            index: 0,
90        }
91    }
92}
93
94impl<'a> Iterator for StrStack<'a> {
95    type Item = char;
96
97    fn next(&mut self) -> Option<Self::Item> {
98        let (width, first) = get_first_char(self.remainder_as_slice());
99        // The width won't exceed the remaining slice, so it can't overflow then length.
100        self.index += width;
101        first
102    }
103}
104
105impl<'a> HaystackIter<'a> for StrStack<'a> {
106    type Slice = &'a str;
107
108    fn current_item(&self) -> Option<Self::Item> {
109        get_item(get_first_char(self.remainder_as_slice()))
110    }
111
112    fn current_index(&self) -> usize {
113        self.index
114    }
115
116    fn whole_slice(&self) -> Self::Slice {
117        self.inner
118    }
119
120    fn remainder_as_slice(&self) -> Self::Slice {
121        &self.inner[self.index..]
122    }
123
124    fn slice_with(&self, range: Range<usize>) -> Self::Slice {
125        &self.inner[range]
126    }
127
128    fn go_to(&mut self, index: usize) {
129        self.index = index;
130    }
131}
132
133impl<'a> Debug for StrStack<'a> {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        let mut len = 0;
136        f.write_fmt(format_args!("\""))write!(f, "\"")?;
137
138        self.inner.char_indices().try_for_each(|(index, ch)| {
139            let mut debug = ch.escape_debug();
140            if index < self.index  {
141                len += debug.len();
142            }
143            debug.try_for_each(|debug_ch| f.write_fmt(format_args!("{0}", debug_ch))write!(f, "{debug_ch}"))
144        })?;
145
146        f.write_fmt(format_args!("\"\n "))write!(f, "\"\n ")?;
147        (0..len).try_for_each(|_| f.write_fmt(format_args!(" "))write!(f, " "))?;
148        f.write_fmt(format_args!("^"))write!(f, "^")
149    }
150}
151
152/// A haystack type for matching against the [`u8`]s in a [`&[u8]`](slice). This type provides very
153/// straightforward indexing and iteration over the contained slice.
154#[derive(#[automatically_derived]
impl<'a> ::core::clone::Clone for ByteStack<'a> {
    #[inline]
    fn clone(&self) -> ByteStack<'a> {
        ByteStack {
            inner: ::core::clone::Clone::clone(&self.inner),
            index: ::core::clone::Clone::clone(&self.index),
        }
    }
}Clone)]
155pub struct ByteStack<'a> {
156    inner: &'a [u8],
157    index: usize,
158}
159
160impl<'a> IntoHaystack<'a, ByteStack<'a>> for &'a [u8] {
161    fn into_haystack(self) -> ByteStack<'a> {
162        ByteStack {
163            inner: self,
164            index: 0,
165        }
166    }
167}
168
169impl<'a> Iterator for ByteStack<'a> {
170    type Item = u8;
171
172    fn next(&mut self) -> Option<Self::Item> {
173        let byte = self.inner.get(self.index).copied();
174
175        if byte.is_some() {
176            self.index += 1;
177        }
178
179        byte
180    }
181}
182
183impl<'a> HaystackIter<'a> for ByteStack<'a> {
184    type Slice = &'a [u8];
185
186    fn current_item(&self) -> Option<Self::Item> {
187        self.inner.get(self.index).copied()
188    }
189
190    fn current_index(&self) -> usize {
191        self.index
192    }
193
194    fn whole_slice(&self) -> Self::Slice {
195        self.inner
196    }
197
198    fn remainder_as_slice(&self) -> Self::Slice {
199        // FIXME: Check for possible panics when slicing.
200        &self.inner[self.index..]
201    }
202
203    fn slice_with(&self, range: Range<usize>) -> Self::Slice {
204        &self.inner[range]
205    }
206
207    fn go_to(&mut self, index: usize) {
208        self.index = index;
209    }
210}
211
212impl<'a> Debug for ByteStack<'a> {
213    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214        f.write_fmt(format_args!("b\""))write!(f, "b\"")?;
215
216        self.inner.iter().try_for_each(|byte| f.write_fmt(format_args!("{0:02x}", byte))write!(f, "{:02x}", byte))?;
217
218        f.write_fmt(format_args!("\"\n  "))write!(f, "\"\n  ")?;
219        (0..self.index).try_for_each(|_| f.write_fmt(format_args!("  "))write!(f, "  "))?;
220        f.write_fmt(format_args!("^"))write!(f, "^")
221    }
222}