standard_lib/collections/hash/map/
hash_map.rs

1use std::borrow::Borrow;
2use std::cmp;
3use std::fmt;
4use std::fmt::{Debug, Display, Formatter};
5use std::hash::{BuildHasher, Hash, RandomState};
6use std::iter::TrustedLen;
7use std::mem;
8use std::ops::Index;
9
10use super::{IndexNoCap, IntoKeys, IntoValues, Iter, Keys, Values, ValuesMut};
11use crate::collections::contiguous::{Array, Vector};
12use crate::util::error::NoValueForKey;
13use crate::util::fmt::DebugRaw;
14use crate::util::result::ResultExtension;
15
16const MIN_ALLOCATED_CAP: usize = 2;
17
18const GROWTH_FACTOR: usize = 2;
19
20const LOAD_FACTOR_NUMERATOR: usize = 4;
21const LOAD_FACTOR_DENOMINATOR: usize = 5;
22
23/// A map of keys to values which relies on the keys implementing [`Hash`].
24///
25/// A custom load factor is not supported at this point, with the default being 4/5.
26///
27/// It is a logic error for keys in a HashMap to be manipulated in a way that changes their hash.
28/// Because of this, HashMap's API prevents mutable access to its keys.
29///
30/// # Time Complexity
31/// For this analysis of time complexity, variables are defined as follows:
32/// - `n`: The number of items in the HashMap.
33///
34/// | Method | Complexity |
35/// |-|-|
36/// | `len` | `O(1)` |
37/// | `insert` | `O(1)`**, `O(n)` |
38/// | `insert_unchecked` | `O(1)`* |
39/// | `get` | `O(1)`* |
40/// | `remove` | `O(1)`* |
41/// | `contains` | `O(1)`* |
42/// | `reserve` | `O(n)`***, `O(1)` |
43///
44/// \* In the event of a has collision, these functions will take additional time, while a valid
45/// / correct location is found. This additional time is kept at a minimum and hash collisions are
46/// unlikely especially with a large capacity.
47///
48/// \** If the HashMap doesn't have enough capacity for the new element, `insert` will take `O(n)`.
49/// \* applies as well.
50///
51/// \*** If the HashMap has enough capacity for the additional items already, `reserve` is `O(1)`.
52pub struct HashMap<K: Hash + Eq, V, B: BuildHasher = RandomState> {
53    pub(crate) arr: Array<Bucket<K, V>>,
54    pub(crate) len: usize,
55    pub(crate) hasher: B,
56}
57
58pub(crate) type Bucket<K, V> = Option<(K, V)>;
59
60impl<K: Hash + Eq, V, B: BuildHasher + Default> HashMap<K, V, B> {
61    /// Creates a new HashMap with capacity 0 and the default value for `B`. Memory will be
62    /// allocated when the capacity changes.
63    pub fn new() -> HashMap<K, V, B> {
64        HashMap {
65            arr: Array::new(),
66            len: 0,
67            hasher: B::default(),
68        }
69    }
70
71    /// Creates a new HashMap with the provided `cap`acity, allowing insertions without
72    /// reallocation. The default hasher will be used.
73    pub fn with_cap(cap: usize) -> HashMap<K, V, B> {
74        // TODO: Adjust this to prevent reallocation during `cap` insertions.
75        HashMap {
76            arr: Array::repeat_default(cap),
77            len: 0,
78            hasher: B::default(),
79        }
80    }
81}
82
83impl<K: Hash + Eq, V, B: BuildHasher> HashMap<K, V, B> {
84    /// Creates a new HashMap with capacity 0 and the provided `hasher`.
85    pub fn with_hasher(hasher: B) -> HashMap<K, V, B> {
86        HashMap {
87            arr: Array::new(),
88            len: 0,
89            hasher,
90        }
91    }
92
93    /// Creates a new HashMap with the provided `cap`acity and `hasher`.
94    pub fn with_cap_and_hasher(cap: usize, hasher: B) -> HashMap<K, V, B> {
95        HashMap {
96            arr: Array::repeat_default(cap),
97            len: 0,
98            hasher,
99        }
100    }
101
102    /// Returns the length of the HashMap (the number of entries it contains).
103    pub const fn len(&self) -> usize {
104        self.len
105    }
106
107    /// Returns true if the HashMap contains no entries.
108    pub const fn is_empty(&self) -> bool {
109        self.len() == 0
110    }
111
112    /// Returns the current capacity of the HashMap.
113    pub const fn cap(&self) -> usize {
114        self.arr.size
115    }
116
117    /// Inserts the provided `key`-`value` pair into the HashMap, increasing its capacity if
118    /// required. If the key was already associated with a value, the previous value is returned.
119    ///
120    /// As with the standard library, the key isn't changed if it already exists.
121    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
122        if self.should_grow() {
123            self.grow()
124        }
125
126        // SAFETY: We've just grown if necessary.
127        let index = unsafe { self.find_index_for_key(&key).unwrap_unchecked() };
128
129        // The bucket at index is either empty or contains an equal key.
130        match &mut self.arr[index] {
131            Some(existing) => {
132                // Replace the value with the provided one.
133                Some(mem::replace(
134                    &mut existing.1,
135                    value
136                ))
137            },
138            None => {
139                // Create a new bucket with the provided values.
140                self.arr[index] = Some((key, value));
141                self.len += 1;
142                None
143            },
144        }
145    }
146
147    /// Inserts the provided `key`-`value` pair without checking if the HashMap has enough capacity.
148    /// If the key was already associated with a value, the previous value is returned.
149    ///
150    /// As with the standard library, the key isn't changed if it already exists.
151    ///
152    /// # Safety
153    /// It is the responsibility of the caller to ensure that the HashMap has enough capacity to add
154    /// the provided entry, using methods like [`reserve`][HashMap::reserve] or
155    /// [`with_cap`](HashMap::with_cap).
156    ///
157    /// # Panics
158    /// Panics if the HashMap has a capacity of 0, as it isn't possible to find a bucket associated
159    /// with the key.
160    pub unsafe fn insert_unchecked(&mut self, key: K, value: V) -> Option<V> {
161        let index = self.find_index_for_key(&key).ok_or(IndexNoCap).throw();
162
163        // The bucket at index is either empty or contains an equal key.
164        match &mut self.arr[index] {
165            Some(existing) => {
166                // Replace the value with the provided one.
167                Some(mem::replace(
168                    &mut existing.1,
169                    value
170                ))
171            },
172            None => {
173                // Create a new bucket with the provided values.
174                self.arr[index] = Some((key, value));
175                self.len += 1;
176                None
177            },
178        }
179    }
180
181    /// Returns the entry for the provided `key` as a key-value pair or None if there is no entry.
182    pub fn get_entry<Q>(&self, key: &Q) -> Option<(&K, &V)>
183    where
184        // We're introducing a new type parameter here, Q which represents a borrowed version of K
185        // where equality and hashing carries over the borrow.
186        K: Borrow<Q>,
187        Q: Hash + Eq + ?Sized,
188    {
189        let index = self.find_index_for_key(key)?;
190
191        // If the bucket at index is empty, the map doesn't contain the key.
192        match &self.arr[index] {
193            Some(existing) => Some((&existing.0, &existing.1)),
194            None => None,
195        }
196    }
197
198    /// Returns a reference to the value associated with the provided `key` or None if the map
199    /// contains no values for `key`.
200    pub fn get<Q>(&self, key: &Q) -> Option<&V>
201    where
202        K: Borrow<Q>,
203        Q: Hash + Eq + ?Sized,
204    {
205        let index = self.find_index_for_key(key)?;
206
207        // If the bucket at index is empty, the map doesn't contain the key.
208        match &self.arr[index] {
209            Some(existing) => Some(&existing.1),
210            None => None,
211        }
212    }
213
214    /// Returns a mutable reference to the value associated with the provided `key` or None if the
215    /// map contains no values for `key`.
216    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
217    where
218        K: Borrow<Q>,
219        Q: Hash + Eq + ?Sized,
220    {
221        let index = self.find_index_for_key(key)?;
222
223        // If the bucket at index is empty, the map doesn't contain the key.
224        match &mut self.arr[index] {
225            Some(existing) => Some(&mut existing.1),
226            None => None,
227        }
228    }
229
230    /// Removes the entry associated with `key`, returning it if it exists.
231    pub fn remove_entry<Q>(&mut self, key: &Q) -> Option<(K, V)>
232    where
233        K: Borrow<Q>,
234        Q: Hash + Eq + ?Sized,
235    {
236        let start = self.find_index_for_key(key)?;
237
238        // If the bucket at index is empty, the map doesn't contain the key.
239        let removed = match mem::take(&mut self.arr[start]) {
240            Some(entry) => {
241                self.len -= 1;
242                entry
243            },
244            None => None?,
245        };
246
247        let mut potential_collisions = Vector::new();
248
249        // UNCHECKED: find_index_for_key returned some, so the cap is not 0.
250        let mut index = (start + 1) % self.cap();
251
252        while self.arr[index].is_some() {
253            // SAFETY: We know that the value at index is some given the loop condition.
254            let entry = unsafe { mem::take(&mut self.arr[index]).unwrap_unchecked() };
255            potential_collisions.push((
256                // SAFETY: We've already propagated a None from find_index_for_key, so
257                // index_from_key will return Some.
258                unsafe { self.index_from_key(&entry.0).unwrap_unchecked() },
259                entry
260            ));
261            index = (index + 1) % self.cap();
262        }
263
264        // Sort by distance to the right from the starting index.
265        potential_collisions.sort_by_key(|(ideal, _)| (*ideal as isize - start as isize).rem_euclid(self.cap() as isize));
266
267        for (ideal, entry) in potential_collisions {
268            let mut index = ideal;
269
270            // Find the closest index on the right of the ideal one, remaining within the original
271            // block naturally.
272            while self.arr[index].is_some() {
273                index = (index + 1) % self.cap();
274            }
275
276            self.arr[index] = Some(entry);
277        }
278
279        Some(removed)
280    }
281
282    /// Removes the entry associated with `key`, returning the value if it exists.
283    pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
284    where
285        K: Borrow<Q>,
286        Q: Hash + Eq + ?Sized,
287    {
288        self.remove_entry(key).map(|(_, v)| v)
289    }
290
291    /// Returns true if there is a value associated with the provided `key`.
292    pub fn contains<Q>(&self, key: &Q) -> bool
293    where
294        K: Borrow<Q>,
295        Q: Hash + Eq + ?Sized,
296    {
297        let index = self.find_index_for_key(key);
298
299        match index {
300            Some(i) => self.arr[i].is_some(),
301            None => false,
302        }
303    }
304
305    /// Increases the capacity of the HashMap to ensure that len + `extra` entries will fit without
306    /// exceeding the load factor.
307    pub fn reserve(&mut self, extra: usize) {
308        let new_cap = self.len.strict_add(extra) * LOAD_FACTOR_DENOMINATOR / LOAD_FACTOR_NUMERATOR;
309        if new_cap <= self.cap() { return; }
310
311        self.realloc_with_cap(new_cap);
312    }
313
314    /// Returns an iterator over all key-value pairs in the HashMap, as references.
315    pub fn iter(&self) -> Iter<'_, K, V> {
316        self.into_iter()
317    }
318
319    /// Consumes self and returns an iterator over all contained keys.
320    pub fn into_keys(self) -> IntoKeys<K, V> {
321        IntoKeys(self.into_iter())
322    }
323
324    /// Returns an iterator over all keys in the HashMap, as references.
325    pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
326        Keys(self.iter())
327    }
328
329    /// Consumes self and returns an iterator over all contained values.
330    pub fn into_values(self) -> IntoValues<K, V> {
331        IntoValues(self.into_iter())
332    }
333
334    /// Returns an iterator over all values in the HashMap, as mutable references.
335    pub fn values_mut<'a>(&'a mut self) -> ValuesMut<'a, K, V> {
336        ValuesMut {
337            len: self.len(),
338            inner: self.arr.iter_mut(),
339        }
340    }
341
342    /// Returns an iterator over all values in the HashMap, as references.
343    pub fn values<'a>(&'a self) -> Values<'a, K, V> {
344        Values(self.iter())
345    }
346}
347
348impl<K: Hash + Eq, V, B: BuildHasher> HashMap<K, V, B> {
349    /// Determines whether the HashMap's length exceeds the load capacity, suggesting that it should
350    /// grow before inserting new entries.
351    pub(crate) const fn should_grow(&self) -> bool {
352        self.len >= self.arr.size * LOAD_FACTOR_NUMERATOR / LOAD_FACTOR_DENOMINATOR
353    }
354
355    /// Grows the HashMap by the growth factor, ensuring that it can hold additional entries.
356    pub(crate) fn grow(&mut self) {
357        let new_cap = cmp::max(self.cap() * GROWTH_FACTOR, MIN_ALLOCATED_CAP);
358
359        self.realloc_with_cap(new_cap)
360    }
361
362    /// Reallocates the HashMap to have capacity equal to `new_cap`, if doing so wouldn't cause the
363    /// map to overload. (There isn't a logical way for the map to shrink and drop entries, so this
364    /// isn't allowed.)
365    pub(crate) fn realloc_with_cap(&mut self, new_cap: usize) {
366        // Can't handle dropping values at this point.
367        if new_cap * LOAD_FACTOR_NUMERATOR / LOAD_FACTOR_DENOMINATOR < self.len { return; }
368
369        // Replace the Array first so that we can consume the old Array.
370        let old_arr = mem::replace(&mut self.arr, Array::repeat_default(new_cap));
371
372        for entry in old_arr.into_iter().flatten() {
373            // SAFETY: If the new capacity is 0, the old_arr has no items and we can't enter
374            // this loop.
375            let index = unsafe { self.find_index_for_key(&entry.0).unwrap_unchecked() };
376
377            // Move the bucket into the new Array.
378            self.arr[index] = Some(entry);
379        }
380    }
381
382    /// Calculates the ideal index of a bucket for the provided `hashable` (or None if the HashMap
383    /// has 0 capacity). This method doesn't consider hash collisions, see
384    /// [`HashMap::find_index_for_key`] for that functionality.
385    pub(crate) fn index_from_key<H: Hash + ?Sized>(&self, hashable: &H) -> Option<usize> {
386        let key_hash = self.hasher.hash_one(hashable);
387        key_hash.checked_rem(self.cap() as u64).map(|i| i as usize)
388    }
389
390    /// Finds the first valid index for the provided `key` (or None if the HashMap has 0 capacity).
391    /// This is done by calculating the ideal index and then iterating until a bucket is found that
392    /// is empty or has an equal key.
393    pub(crate) fn find_index_for_key<Q>(&self, key: &Q) -> Option<usize>
394    where
395        // We're introducing a new type parameter here, Q which represents a borrowed version of K
396        // where equality and hashing carries over the borrow.
397        K: Borrow<Q>,
398        Q: Hash + Eq + ?Sized,
399    {
400        let mut index = self.index_from_key(&key)?;
401
402        // This is where Eq comes in: while there is a value at the current index, but the key
403        // isn't equal, increment the index (wrapping at the capacity) and check again.
404        // Can't enter recursion unless the load factor is 100%.
405        while let Some(existing) = &self.arr[index]
406            && existing.0.borrow() != key
407        {
408            // UNCHECKED: find_index_for_key returned some, so the cap is not 0.
409            index = (index + 1) % self.cap();
410        }
411
412        // After that loop, index is either empty or contains an equal key.
413        Some(index)
414    }
415}
416
417impl<K: Hash + Eq, V, B: BuildHasher + Default> Default for HashMap<K, V, B> {
418    fn default() -> Self {
419        HashMap::new()
420    }
421}
422
423impl<Q, K, V> Index<&Q> for HashMap<K, V>
424where
425    Q: Hash + Eq + ?Sized,
426    K: Hash + Eq + Borrow<Q>,
427{
428    type Output = V;
429
430    fn index(&self, key: &Q) -> &Self::Output {
431        let index = self.find_index_for_key(key).ok_or(NoValueForKey).throw();
432
433        match &self.arr[index] {
434            Some((_, v)) => v,
435            None => Err(NoValueForKey).throw(),
436        }
437    }
438}
439
440impl<K: Hash + Eq, V, B: BuildHasher + Default> Extend<(K, V)> for HashMap<K, V, B> {
441    fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
442        for (key, value) in iter {
443            self.insert(key, value);
444        }
445    }
446
447    fn extend_one(&mut self, item: (K, V)) {
448        self.insert(item.0, item.1);
449    }
450
451    fn extend_reserve(&mut self, additional: usize) {
452        self.reserve(additional);
453    }
454
455    unsafe fn extend_one_unchecked(&mut self, item: (K, V))
456    where
457        Self: Sized,
458    {
459        // SAFETY: extend_reserve is implemented correctly, so all other safety requirements are the
460        // responsibility of the caller.
461        unsafe { self.insert_unchecked(item.0, item.1); }
462    }
463}
464
465impl<K, V, B, I> From<I> for HashMap<K, V, B>
466where
467    K: Hash + Eq,
468    B: BuildHasher + Default,
469    I: Iterator<Item = (K, V)> + ExactSizeIterator + TrustedLen,
470{
471    fn from(value: I) -> Self {
472        let iter = value.into_iter();
473        let mut map = HashMap::with_cap(iter.len());
474
475        for (key, value) in iter {
476            // SAFETY: HashMap has been created with the right capacity.
477            unsafe { map.insert_unchecked(key, value); }
478        }
479
480        map
481    }
482}
483
484impl<K: Hash + Eq, V, B: BuildHasher + Default> FromIterator<(K, V)> for HashMap<K, V, B> {
485    fn from_iter<I: IntoIterator<Item = (K, V)>>(value: I) -> Self {
486        let iter = value.into_iter();
487        let mut map = HashMap::with_cap(iter.size_hint().0);
488        for (key, value) in iter {
489            map.insert(key, value);
490        }
491        map
492    }
493}
494
495// TODO: impl PartialEq and Eq
496
497impl<K: Hash + Eq + Debug, V: Debug, B: BuildHasher + Debug> Debug for HashMap<K, V, B> {
498    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
499        f.debug_struct("HashMap")
500            .field_with("buckets", |f| f.debug_list().entries(
501                self.arr.iter()
502                    .map(|o| DebugRaw(match o {
503                        Some((k, v)) => format!("({k:?}: {v:?})"),
504                        None => "_".into(),
505                    }))
506            ).finish())
507            .field("len", &self.len)
508            .field("cap", &self.cap())
509            .field("hasher", &self.hasher)
510            .finish()
511    }
512}
513
514impl<K: Hash + Eq + Debug, V: Debug, B: BuildHasher> Display for HashMap<K, V, B> {
515    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
516        write!(f, "#")?;
517        f.debug_map().entries(self.iter()).finish()
518    }
519}