Skip to main content

ct_regex_internal/haystack/
item.rs

1use std::fmt::Debug;
2
3mod haystack_item {
4    pub trait Sealed {}
5}
6
7use haystack_item::Sealed;
8
9/// A trait that represents an individual item that can be matched against a
10/// [`Regex`](crate::expr::Regex). The primary (and only) two implementers are [`char`] and [`u8`].
11///
12/// # Sealed
13///
14/// This trait is sealed, preventing implementations because the `regex!` macro can't produce
15/// `Regex` types that match against any `HaystackItem` other than the default. If you need to match
16/// against another item type and want to use this crate, you may as well fork it so that you don't
17/// have to write manual `Matcher` expressions.
18pub trait HaystackItem: Debug + Default + Copy + Eq + Ord + Sealed {
19    /// Creates a `Vec` of this item from the provided `&str`, used to convert string literals from
20    /// parsed regular expressions into individual `HaystackItem`s that can be matched in a
21    /// haystack.
22    fn collect_from_str(value: &str) -> Vec<Self>;
23
24    /// Creates a `Vec` of this item from the provided `&[u8]`, used to convert a series of bytes to
25    /// match from parsed regular expressions into individual `HaystackItem`s.
26    fn collect_from_bytes(value: &[u8]) -> Vec<Self>;
27
28    /// Check is this item is a newline character ('\n' or b'\n'). Used for string and line
29    /// anchoring.
30    fn is_newline(self) -> bool;
31
32    /// Check is this item is a carriage return character ('\r' or b'\r'). Used for string and line
33    /// anchoring.
34    fn is_return(self) -> bool;
35}
36
37/// A helper for getting the first `char` of a provided `&str`. Returns the width of the character
38/// (possibly zero) and the character itself.
39pub fn first_char_and_width(value: &str) -> (usize, Option<char>) {
40    // Unfortunately, I don't think there is a stable way to get `char`s from a `str` without using
41    // the `chars` or `char_indices` iterators. We can calculate the width easily but may as well
42    // have it done for us.
43    let mut iter = value.char_indices();
44    let first = iter.next();
45    (iter.offset(), first.map(|(_, c)| c))
46}
47
48pub fn first_char(value: &str) -> Option<char> {
49    value.chars().next()
50}
51
52impl Sealed for char {}
53
54impl HaystackItem for char {
55    fn collect_from_str(value: &str) -> Vec<Self> {
56        value.chars().collect()
57    }
58
59    fn collect_from_bytes(value: &[u8]) -> Vec<Self> {
60        Self::collect_from_str(
61            str::from_utf8(value).expect("failed to convert bytes to valid unicode")
62        )
63
64    }
65
66    fn is_newline(self) -> bool {
67        self == '\n'
68    }
69
70    fn is_return(self) -> bool {
71        self == '\r'
72    }
73}
74
75impl Sealed for u8 {}
76
77impl HaystackItem for u8 {
78    fn collect_from_str(value: &str) -> Vec<Self> {
79        Self::collect_from_bytes(value.as_bytes())
80    }
81
82
83    fn collect_from_bytes(s: &[u8]) -> Vec<Self> {
84        s.to_vec()
85    }
86
87    fn is_newline(self) -> bool {
88        self == b'\n'
89    }
90
91    fn is_return(self) -> bool {
92        self == b'\r'
93    }
94}