1use std::{fmt::{self, Debug}, ops::Range};
23use crate::haystack::{HaystackSlice, IntoHaystack};
45// TODO: Document cheap cloning requirement, usize state. Understand slicing and iterating, often
6// dealing with variable width unicode characters...
78/// The main underlying trait for [`Haystack`](crate::haystack::Haystack) types, `HaystackIter`
9/// should be implemented on new types that understand slicing and iterating over a haystack that
10/// can be sliced into instances of `Self::Slice`.
11///
12/// For unicode-based haystacks like [`&str`](str), the implementing type needs to be able to deal
13/// with the contained variable width code points.
14///
15/// This trait requires that implementors also implement
16/// [`Iterator<Item = Self::Slice::Item>`](Iterator). When [`Iterator::next`] is called, on a
17/// `HaystackIter` it should return the same value that previous calls to
18/// [`current_item`](Self::current_item) have, before progressing the index to the next item. When
19/// the last item has been returned by `next`, the iterators should return None. Any future calls
20/// should avoid incrementing the index.
21///
22/// Additionally, `HaystackIter`s should be cheap to clone and able to produce and restore an index
23/// representing the current position.
24///
25/// Although possible, there is no point implementing a `HaystackIter` that shares a `Slice` with
26/// another `HaystackIter`.
27pub trait HaystackIter<'a>: Debug + Clone
28 + Iterator<Item = <Self::Slice as HaystackSlice<'a>>::Item>
29{
30/// The `HaystackSlice` returned by this type when slicing the underlying haystack. This type is
31 /// usually also contained within the implementor used to create an instance via
32 /// [`IntoHaystack`].
33type Slice: HaystackSlice<'a>;
3435/// Returns the item currently being matched in the haystack. Repeatedly calling this method
36 /// should return the same item, until progressed with [`Iterator::next`].
37fn current_item(&self) -> Option<Self::Item>;
3839/// Returns the index of the current item in the original haystack. The returned value should be
40 /// valid to pass to [`Self::go_to`] without causing a panic.
41fn current_index(&self) -> usize;
4243/// Returns the underlying slice, as it was when this `HaystackIter` was created - representing
44 /// the entire haystack being matched against.
45fn whole_slice(&self) -> Self::Slice;
4647/// Returns the remaining contents of this haystack, as a `Slice`. For slice based haystacks,
48 /// this is can be implemented as `&self.inner[self.index..]`.
49fn remainder_as_slice(&self) -> Self::Slice;
5051/// Slices the original haystack with the provided (half-open) `range`, used for retrieving
52 /// values of capture groups.
53fn slice_with(&self, range: Range<usize>) -> Self::Slice;
5455/// Restores the `index` of the haystack to the provided one. This should only be called with
56 /// indexes obtained by calling [`current_index`](Self::current_index) on this `HaystackIter`.
57fn go_to(&mut self, index: usize);
58}
5960/// A helper for getting the first `char` of a provided `&str`. Returns the width of the character
61/// (possibly zero) and the character itself.
62pub fn get_first_char(value: &str) -> (usize, Option<char>) {
63// Unfortunately, I don't think there is a stable way to get `char`s from a `str` without using
64 // the `chars` or `char_indicies` iterators. We can calculate the width easily but may as well
65 // have it done for us.
66let mut iter = value.char_indices();
67let first = iter.next();
68 (iter.offset(), first.map(get_item))
69}
7071fn get_item<I>((_, item): (usize, I)) -> I { item }
7273/// A haystack type for matching against the [`char`]s in a [`&str`](str). This type abstracts over
74/// the variable width scalars contained, to allow indexing without panics.
75///
76/// To accomodate, calls to [`go_to`](Self::go_to) should only be made with an index previously
77/// produced by this type for the specific haystack. Failure to do so, may cause a panic if indexing
78/// on an invalid unicode boundary.
79#[derive(#[automatically_derived]
impl<'a> ::core::clone::Clone for StrStack<'a> {
#[inline]
fn clone(&self) -> StrStack<'a> {
StrStack {
inner: ::core::clone::Clone::clone(&self.inner),
index: ::core::clone::Clone::clone(&self.index),
}
}
}Clone)]
80pub struct StrStack<'a> {
81 inner: &'a str,
82 index: usize,
83}
8485impl<'a> IntoHaystack<'a, StrStack<'a>> for &'a str {
86fn into_haystack(self) -> StrStack<'a> {
87 StrStack {
88 inner: self,
89 index: 0,
90 }
91 }
92}
9394impl<'a> Iterator for StrStack<'a> {
95type Item = char;
9697fn next(&mut self) -> Option<Self::Item> {
98let (width, first) = get_first_char(self.remainder_as_slice());
99// The width won't exceed the remaining slice, so it can't overflow then length.
100self.index += width;
101 first
102 }
103}
104105impl<'a> HaystackIter<'a> for StrStack<'a> {
106type Slice = &'a str;
107108fn current_item(&self) -> Option<Self::Item> {
109 get_item(get_first_char(self.remainder_as_slice()))
110 }
111112fn current_index(&self) -> usize {
113self.index
114 }
115116fn whole_slice(&self) -> Self::Slice {
117self.inner
118 }
119120fn remainder_as_slice(&self) -> Self::Slice {
121&self.inner[self.index..]
122 }
123124fn slice_with(&self, range: Range<usize>) -> Self::Slice {
125&self.inner[range]
126 }
127128fn go_to(&mut self, index: usize) {
129self.index = index;
130 }
131}
132133impl<'a> Debug for StrStack<'a> {
134fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135let mut len = 0;
136f.write_fmt(format_args!("\""))write!(f, "\"")?;
137138self.inner.char_indices().try_for_each(|(index, ch)| {
139let mut debug = ch.escape_debug();
140if index < self.index {
141 len += debug.len();
142 }
143 debug.try_for_each(|debug_ch| f.write_fmt(format_args!("{0}", debug_ch))write!(f, "{debug_ch}"))
144 })?;
145146f.write_fmt(format_args!("\"\n "))write!(f, "\"\n ")?;
147 (0..len).try_for_each(|_| f.write_fmt(format_args!(" "))write!(f, " "))?;
148f.write_fmt(format_args!("^"))write!(f, "^")149 }
150}
151152/// A haystack type for matching against the [`u8`]s in a [`&[u8]`](slice). This type provides very
153/// straightforward indexing and iteration over the contained slice.
154#[derive(#[automatically_derived]
impl<'a> ::core::clone::Clone for ByteStack<'a> {
#[inline]
fn clone(&self) -> ByteStack<'a> {
ByteStack {
inner: ::core::clone::Clone::clone(&self.inner),
index: ::core::clone::Clone::clone(&self.index),
}
}
}Clone)]
155pub struct ByteStack<'a> {
156 inner: &'a [u8],
157 index: usize,
158}
159160impl<'a> IntoHaystack<'a, ByteStack<'a>> for &'a [u8] {
161fn into_haystack(self) -> ByteStack<'a> {
162 ByteStack {
163 inner: self,
164 index: 0,
165 }
166 }
167}
168169impl<'a> Iterator for ByteStack<'a> {
170type Item = u8;
171172fn next(&mut self) -> Option<Self::Item> {
173let byte = self.inner.get(self.index).copied();
174175if byte.is_some() {
176self.index += 1;
177 }
178179 byte
180 }
181}
182183impl<'a> HaystackIter<'a> for ByteStack<'a> {
184type Slice = &'a [u8];
185186fn current_item(&self) -> Option<Self::Item> {
187self.inner.get(self.index).copied()
188 }
189190fn current_index(&self) -> usize {
191self.index
192 }
193194fn whole_slice(&self) -> Self::Slice {
195self.inner
196 }
197198fn remainder_as_slice(&self) -> Self::Slice {
199// FIXME: Check for possible panics when slicing.
200&self.inner[self.index..]
201 }
202203fn slice_with(&self, range: Range<usize>) -> Self::Slice {
204&self.inner[range]
205 }
206207fn go_to(&mut self, index: usize) {
208self.index = index;
209 }
210}
211212impl<'a> Debug for ByteStack<'a> {
213fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214f.write_fmt(format_args!("b\""))write!(f, "b\"")?;
215216self.inner.iter().try_for_each(|byte| f.write_fmt(format_args!("{0:02x}", byte))write!(f, "{:02x}", byte))?;
217218f.write_fmt(format_args!("\"\n "))write!(f, "\"\n ")?;
219 (0..self.index).try_for_each(|_| f.write_fmt(format_args!(" "))write!(f, " "))?;
220f.write_fmt(format_args!("^"))write!(f, "^")221 }
222}