ct_regex_internal/haystack/item.rs
1use std::fmt::Debug;
2
3mod haystack_item {
4 pub trait Sealed {}
5}
6
7use haystack_item::Sealed;
8
9/// A trait that represents an individual item that can be matched against a
10/// [`Regex`](crate::expr::Regex). The primary (and only) two implementers are [`char`] and [`u8`].
11///
12/// # Sealed
13///
14/// This trait is sealed, preventing implementations because the `regex!` macro can't produce
15/// `Regex` types that match against any `HaystackItem` other than the default. If you need to match
16/// against another item type and want to use this crate, you may as well fork it so that you don't
17/// have to write manual `Matcher` expressions.
18pub trait HaystackItem: Debug + Default + Copy + Eq + Ord + Sealed {
19 /// Creates a `Vec` of this item from the provided `&str`, used to convert string literals from
20 /// parsed regular expressions into individual `HaystackItem`s that can be matched in a
21 /// haystack.
22 fn collect_from_str(value: &str) -> Vec<Self>;
23
24 /// Creates a `Vec` of this item from the provided `&[u8]`, used to convert a series of bytes to
25 /// match from parsed regular expressions into individual `HaystackItem`s.
26 fn collect_from_bytes(value: &[u8]) -> Vec<Self>;
27
28 /// Check is this item is a newline character ('\n' or b'\n'). Used for string and line
29 /// anchoring.
30 fn is_newline(self) -> bool;
31
32 /// Check is this item is a carriage return character ('\r' or b'\r'). Used for string and line
33 /// anchoring.
34 fn is_return(self) -> bool;
35}
36
37/// A helper for getting the first `char` of a provided `&str`. Returns the width of the character
38/// (possibly zero) and the character itself.
39pub fn first_char_and_width(value: &str) -> (usize, Option<char>) {
40 // Unfortunately, I don't think there is a stable way to get `char`s from a `str` without using
41 // the `chars` or `char_indices` iterators. We can calculate the width easily but may as well
42 // have it done for us.
43 let mut iter = value.char_indices();
44 let first = iter.next();
45 (iter.offset(), first.map(|(_, c)| c))
46}
47
48pub fn first_char(value: &str) -> Option<char> {
49 value.chars().next()
50}
51
52impl Sealed for char {}
53
54impl HaystackItem for char {
55 fn collect_from_str(value: &str) -> Vec<Self> {
56 value.chars().collect()
57 }
58
59 fn collect_from_bytes(value: &[u8]) -> Vec<Self> {
60 Self::collect_from_str(
61 str::from_utf8(value).expect("failed to convert bytes to valid unicode")
62 )
63
64 }
65
66 fn is_newline(self) -> bool {
67 self == '\n'
68 }
69
70 fn is_return(self) -> bool {
71 self == '\r'
72 }
73}
74
75impl Sealed for u8 {}
76
77impl HaystackItem for u8 {
78 fn collect_from_str(value: &str) -> Vec<Self> {
79 Self::collect_from_bytes(value.as_bytes())
80 }
81
82
83 fn collect_from_bytes(s: &[u8]) -> Vec<Self> {
84 s.to_vec()
85 }
86
87 fn is_newline(self) -> bool {
88 self == b'\n'
89 }
90
91 fn is_return(self) -> bool {
92 self == b'\r'
93 }
94}