standard_lib/collections/hash/set/
hash_set.rs

1use std::borrow::Borrow;
2use std::fmt::{self, Debug, Display, Formatter};
3use std::hash::{BuildHasher, Hash, RandomState};
4use std::iter::TrustedLen;
5use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign};
6
7use super::Iter;
8use crate::collections::contiguous::Vector;
9use crate::collections::hash::HashMap;
10#[doc(inline)]
11pub use crate::collections::hash::map::IndexNoCap;
12#[doc(inline)]
13pub use crate::collections::traits::set::{SetInterface, SetIterator};
14use crate::util::fmt::DebugRaw;
15use crate::util::result::ResultExtension;
16
17/// A set of values that prevents duplicates with the help of the [`Hash`] trait.
18///
19/// Relies on [`HashMap`] internally, see documentation there for additional details.
20///
21/// A custom load factor is not supported at this point, with the default being 4/5.
22///
23/// It is a logic error for keys in a HashSet to be manipulated in a way that changes their hash.
24/// Because of this, HashSet's API prevents mutable access to its keys.
25///
26/// # Time Complexity
27/// See [`HashMap`] with the following additions.
28///
29/// Variables are defined as follows:
30/// - `n`: The number of items in the HashSet.
31/// - `m`: The number of items in the second HashSet.
32///
33/// | Method | Complexity |
34/// |-|-|
35/// | `difference`*, `-` | `O(n)` |
36/// | `-=` | `O(m)` |
37/// | `symmetric_difference`*, `^` | `O(n+m)` |
38/// | `^=` | `O(n+m)`**, `O(m)` |
39/// | `intersection`*, `&` | `O(n)` |
40/// | `&=` | `O(n)` |
41/// | `union`*, `\|` | `O(n+m)` |
42/// | `\|=` | `O(n+m)`**, `O(m)` |
43/// | `is_subset` | `O(m)` |
44/// | `is_superset` | `O(n)` |
45///
46/// In the event of a has collision, all methods will take additional time. This additional time is
47/// kept at a minimum and hash collisions are unlikely especially with a large capacity.
48///
49/// \* When exhausted.
50///
51/// \** If the HashMap already has capacity for the additional items, these methods will take `O(m)`
52/// instead.
53pub struct HashSet<T: Hash + Eq, B: BuildHasher = RandomState> {
54    // Yay, we get to do the thing where unit type evaluates to a no-op.
55    pub(crate) inner: HashMap<T, (), B>,
56}
57
58impl<T: Hash + Eq, B: BuildHasher + Default> HashSet<T, B> {
59    /// Creates a new HashSet with capacity 0 and the default value for `B`. Memory will be
60    /// allocated when the capacity changes.
61    pub fn new() -> HashSet<T, B> {
62        HashSet {
63            inner: HashMap::new(),
64        }
65    }
66
67    /// Creates a new HashSet with the provided `cap`acity, allowing insertions without
68    /// reallocation. The default hasher will be used.
69    pub fn with_cap(cap: usize) -> HashSet<T, B> {
70        HashSet {
71            inner: HashMap::with_cap(cap),
72        }
73    }
74}
75
76impl<T: Hash + Eq, B: BuildHasher> HashSet<T, B> {
77    /// Creates a new HashSet with capacity 0 and the provided `hasher`.
78    pub fn with_hasher(hasher: B) -> HashSet<T, B> {
79        HashSet {
80            inner: HashMap::with_hasher(hasher),
81        }
82    }
83
84    /// Creates a new HashSet with the provided `cap`acity and `hasher`.
85    pub fn with_cap_and_hasher(cap: usize, hasher: B) -> HashSet<T, B> {
86        HashSet {
87            inner: HashMap::with_cap_and_hasher(cap, hasher),
88        }
89    }
90
91    /// Returns the length of the HashSet (the number of elements it contains).
92    pub const fn len(&self) -> usize {
93        self.inner.len()
94    }
95
96    /// Returns true if the HashSet contains no elements.
97    pub const fn is_empty(&self) -> bool {
98        self.inner.is_empty()
99    }
100
101    /// Returns the current capacity of the HashSet.
102    pub const fn cap(&self) -> usize {
103        self.inner.cap()
104    }
105
106    /// Inserts the provided item into the HashSet, increasing its capacity if required. If the item
107    /// was already included, no change is made and the method returns false. In other words, the
108    /// method returns true if the insertion changes the HashSet.
109    pub fn insert(&mut self, item: T) -> bool {
110        if self.inner.should_grow() {
111            self.inner.grow()
112        }
113
114        // SAFETY: We've just grown if necessary.
115        let index = unsafe { self.inner.find_index_for_key(&item).unwrap_unchecked() };
116
117        // The Bucket at index is either empty or contains an equal item.
118        match self.inner.arr[index] {
119            Some(_) => false,
120            None => {
121                // Create a new Bucket with the provided values.
122                self.inner.arr[index] = Some((item, ()));
123                self.inner.len += 1;
124                true
125            },
126        }
127    }
128
129    /// Inserts the provided item, without checking if the HashSet has enough capacity. If the item
130    /// was already included, no change is made and the method returns false.
131    ///
132    /// # Safety
133    /// It is the responsibility of the caller to ensure that the HashSet has enough capacity to add
134    /// the provided item, using methods like [`reserve`][HashSet::reserve] or
135    /// [`with_cap`](HashSet::with_cap).
136    ///
137    /// # Panics
138    /// Panics if the HashSet has a capacity of 0, as it isn't possible to find a bucket associated
139    /// with the item.
140    pub unsafe fn insert_unchecked(&mut self, item: T) -> bool {
141        let index = self.inner.find_index_for_key(&item).ok_or(IndexNoCap).throw();
142
143        // The Bucket at index is either empty or contains an equal item.
144        match &mut self.inner.arr[index] {
145            Some(_) => true,
146            None => {
147                // Create a new Bucket with the provided values.
148                self.inner.arr[index] = Some((item, ()));
149                self.inner.len += 1;
150                false
151            },
152        }
153    }
154
155    /// Returns true if the HashSet contains `item`.
156    #[inline(always)]
157    pub fn contains<Q>(&self, item: &Q) -> bool
158    where
159        T: Borrow<Q>,
160        Q: Hash + Eq + ?Sized,
161    {
162        <HashSet<T, B> as SetInterface<T, Q>>::contains(self, item)
163    }
164
165    /// Returns a reference to the contained element equal to the provided `item` or None if there
166    /// isn't one.
167    #[inline(always)]
168    pub fn get<Q>(&self, item: &Q) -> Option<&T>
169    where
170        T: Borrow<Q>,
171        Q: Hash + Eq + ?Sized,
172    {
173        <HashSet<T, B> as SetInterface<T, Q>>::get(self, item)
174    }
175
176    /// Removes `item` from the HashSet, returning it if it exists.
177    #[inline(always)]
178    pub fn remove<Q>(&mut self, item: &Q) -> Option<T>
179    where
180        T: Borrow<Q>,
181        Q: Hash + Eq + ?Sized,
182    {
183        <HashSet<T, B> as SetInterface<T, Q>>::remove(self, item)
184    }
185
186    /// Increases the capacity of the HashSet to ensure that len + `extra` elements will fit without
187    /// exceeding the load factor.
188    pub fn reserve(&mut self, extra: usize) {
189        self.inner.reserve(extra)
190    }
191
192    /// Returns an iterator over all elements in the HashSet, as references.
193    #[inline(always)]
194    pub fn iter<'a>(&'a self) -> Iter<'a, T> {
195        <HashSet<T, B> as SetIterator<T>>::iter(self)
196    }
197}
198
199impl<T: Hash + Eq + Borrow<Q>, B: BuildHasher, Q: Hash + Eq + ?Sized> SetInterface<T, Q> for HashSet<T, B> {
200    fn contains(&self, item: &Q) -> bool {
201        self.inner.contains(item)
202    }
203
204    fn get(&self, item: &Q) -> Option<&T> {
205        self.inner.get_entry(item).map(|(k, _)| k)
206    }
207
208    fn remove(&mut self, item: &Q) -> Option<T> {
209        self.inner.remove_entry(item).map(|(k, _)| k)
210    }
211}
212
213impl<T: Hash + Eq, B: BuildHasher> SetIterator<T> for HashSet<T, B> {
214    type Iter<'a> = Iter<'a, T> where Self: 'a;
215
216    fn iter<'a>(&'a self) -> Self::Iter<'a> {
217        self.into_iter()
218    }
219}
220
221impl<T: Hash + Eq + Clone, B: BuildHasher + Default> BitOr for HashSet<T, B> {
222    type Output = HashSet<T, B>;
223
224    /// Returns the union of `self` and `rhs`, as a HashSet. (`self ∪ other`)
225    fn bitor(self, rhs: Self) -> Self::Output {
226        self.into_union(rhs).collect()
227    }
228}
229
230impl<T: Hash + Eq, B: BuildHasher> BitOrAssign for HashSet<T, B> {
231    /// Adds all items from `rhs` to `self` to form a union in place.
232    fn bitor_assign(&mut self, rhs: Self) {
233        self.reserve(rhs.cap());
234        for item in rhs {
235            // SAFETY: self has capacity for all items in rhs because we just called reserve.
236            unsafe { self.insert_unchecked(item); }
237        }
238    }
239}
240
241impl<T: Hash + Eq + Clone, B: BuildHasher + Default> BitAnd for HashSet<T, B> {
242    type Output = HashSet<T, B>;
243
244    /// Returns the intersection of `self` and `rhs`, as a HashSet. (`self ∩ other`)
245    fn bitand(self, rhs: Self) -> Self::Output {
246        self.into_intersection(rhs).collect()
247    }
248}
249
250impl<T: Hash + Eq, B: BuildHasher> BitAndAssign for HashSet<T, B> {
251    /// Removes all items not in `rhs` from `self` to form an intersection in place.
252    fn bitand_assign(&mut self, rhs: Self) {
253        let mut to_remove = Vector::with_cap(self.cap());
254        for item in self.iter() {
255            if !rhs.contains(item) {
256                to_remove.push(
257                    // SAFETY: We are in a loop over self, so cap > 0.
258                    unsafe { self.inner.find_index_for_key(item).unwrap_unchecked() }
259                );
260            }
261        }
262        for index in to_remove {
263            if self.inner.arr[index].is_some() {
264                self.inner.arr[index] = None;
265                self.inner.len -= 1;
266            }
267        }
268    }
269}
270
271impl<T: Hash + Eq + Clone, B: BuildHasher + Default> BitXor for HashSet<T, B> {
272    type Output = HashSet<T, B>;
273
274    /// Returns the symmetric difference of `self` and `rhs`, as a HashSet. (`self △ other`)
275    fn bitxor(self, rhs: Self) -> Self::Output {
276        self.into_symmetric_difference(rhs).collect()
277    }
278}
279
280impl<T: Hash + Eq, B: BuildHasher> BitXorAssign for HashSet<T, B> {
281    /// Removes all items in both `rhs` and `self` from `self` to form the symmetric difference of
282    /// the two in place.
283    fn bitxor_assign(&mut self, rhs: Self) {
284        for item in rhs {
285            if self.remove(&item).is_none() {
286                self.insert(item);
287            }
288        }
289    }
290}
291
292impl<T: Hash + Eq + Clone, B: BuildHasher + Default> Sub for HashSet<T, B> {
293    type Output = HashSet<T, B>;
294
295    /// Returns the difference of `self` and `rhs`, as a HashSet. (`self \ other`)
296    fn sub(self, rhs: Self) -> Self::Output {
297        self.into_difference(rhs).collect()
298    }
299}
300
301impl<T: Hash + Eq, B: BuildHasher> SubAssign for HashSet<T, B> {
302    /// Removes all items in `rhs` from `self` to form the difference of the two in place.
303    fn sub_assign(&mut self, rhs: Self) {
304        for item in rhs {
305            self.remove(&item);
306        }
307    }
308}
309
310impl<T: Hash + Eq, B: BuildHasher + Default> Default for HashSet<T, B> {
311    fn default() -> Self {
312        Self::new()
313    }
314}
315
316impl<T: Hash + Eq, B: BuildHasher> Extend<T> for HashSet<T, B> {
317    fn extend<A: IntoIterator<Item = T>>(&mut self, iter: A) {
318        for item in iter {
319            self.insert(item);
320        }
321    }
322
323    fn extend_one(&mut self, item: T) {
324        self.insert(item);
325    }
326
327    fn extend_reserve(&mut self, additional: usize) {
328        self.reserve(additional);
329    }
330
331    unsafe fn extend_one_unchecked(&mut self, item: T)
332    where
333        Self: Sized,
334    {
335        // SAFETY: extend_reserve is implemented correctly, so all other safety requirements are the
336        // responsibility of the caller.
337        unsafe { self.insert_unchecked(item); }
338    }
339}
340
341impl<T, B, I> From<I> for HashSet<T, B>
342where
343    T: Hash + Eq,
344    B: BuildHasher + Default,
345    I: Iterator<Item = T> + ExactSizeIterator + TrustedLen,
346{
347    fn from(value: I) -> Self {
348        let iter = value.into_iter();
349        let mut set = HashSet::with_cap(iter.len());
350
351        for item in iter {
352            // SAFETY: HashSet has been created with the right capacity.
353            unsafe { set.insert_unchecked(item); }
354        }
355
356        set
357    }
358}
359
360impl<T: Hash + Eq, B: BuildHasher + Default> FromIterator<T> for HashSet<T, B> {
361    fn from_iter<I: IntoIterator<Item = T>>(value: I) -> Self {
362        let iter = value.into_iter();
363        let mut set = HashSet::with_cap(iter.size_hint().0);
364
365        for item in iter {
366            set.insert(item);
367        }
368
369        set
370    }
371}
372
373impl<T: Hash + Eq, B: BuildHasher> PartialEq for HashSet<T, B> {
374    /// Two HashSets are considered equal if they contain exactly the same elements.
375    fn eq(&self, other: &Self) -> bool {
376        self.len() == other.len()
377            && self.is_subset(other)
378            && self.is_superset(other)
379    }
380}
381
382impl<T: Hash + Eq, B: BuildHasher> Eq for HashSet<T, B> {}
383
384impl<T: Hash + Eq + Debug, B: BuildHasher + Debug> Debug for HashSet<T, B> {
385    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
386        f.debug_struct("HashSet")
387            .field_with("buckets", |f| f.debug_list().entries(
388                self.inner.arr.iter()
389                    .map(|o| DebugRaw(match o {
390                        Some((t, _)) => format!("{t:?}"),
391                        None => "_".into(),
392                    }))
393            ).finish())
394            .field("len", &self.len())
395            .field("cap", &self.cap())
396            .field("hasher", &self.inner.hasher)
397            .finish()
398    }
399}
400
401impl<T: Hash + Eq + Debug, B: BuildHasher> Display for HashSet<T, B> {
402    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
403        write!(f, "#")?;
404        f.debug_set().entries(self.iter()).finish()
405    }
406}