standard_lib/collections/hash/map/hash_map.rs
1use std::borrow::Borrow;
2use std::cmp;
3use std::fmt;
4use std::fmt::{Debug, Display, Formatter};
5use std::hash::{BuildHasher, Hash, RandomState};
6use std::iter::TrustedLen;
7use std::mem;
8use std::ops::Index;
9
10use super::{IndexNoCap, IntoKeys, IntoValues, Iter, Keys, Values, ValuesMut};
11use crate::collections::contiguous::{Array, Vector};
12use crate::util::error::NoValueForKey;
13use crate::util::fmt::DebugRaw;
14use crate::util::result::ResultExtension;
15
16const MIN_ALLOCATED_CAP: usize = 2;
17
18const GROWTH_FACTOR: usize = 2;
19
20const LOAD_FACTOR_NUMERATOR: usize = 4;
21const LOAD_FACTOR_DENOMINATOR: usize = 5;
22
23/// A map of keys to values which relies on the keys implementing [`Hash`].
24///
25/// A custom load factor is not supported at this point, with the default being 4/5.
26///
27/// It is a logic error for keys in a HashMap to be manipulated in a way that changes their hash.
28/// Because of this, HashMap's API prevents mutable access to its keys.
29///
30/// # Time Complexity
31/// For this analysis of time complexity, variables are defined as follows:
32/// - `n`: The number of items in the HashMap.
33///
34/// | Method | Complexity |
35/// |-|-|
36/// | `len` | `O(1)` |
37/// | `insert` | `O(1)`**, `O(n)` |
38/// | `insert_unchecked` | `O(1)`* |
39/// | `get` | `O(1)`* |
40/// | `remove` | `O(1)`* |
41/// | `contains` | `O(1)`* |
42/// | `reserve` | `O(n)`***, `O(1)` |
43///
44/// \* In the event of a has collision, these functions will take additional time, while a valid
45/// / correct location is found. This additional time is kept at a minimum and hash collisions are
46/// unlikely especially with a large capacity.
47///
48/// \** If the HashMap doesn't have enough capacity for the new element, `insert` will take `O(n)`.
49/// \* applies as well.
50///
51/// \*** If the HashMap has enough capacity for the additional items already, `reserve` is `O(1)`.
52pub struct HashMap<K: Hash + Eq, V, B: BuildHasher = RandomState> {
53 pub(crate) arr: Array<Bucket<K, V>>,
54 pub(crate) len: usize,
55 pub(crate) hasher: B,
56}
57
58pub(crate) type Bucket<K, V> = Option<(K, V)>;
59
60impl<K: Hash + Eq, V, B: BuildHasher + Default> HashMap<K, V, B> {
61 /// Creates a new HashMap with capacity 0 and the default value for `B`. Memory will be
62 /// allocated when the capacity changes.
63 pub fn new() -> HashMap<K, V, B> {
64 HashMap {
65 arr: Array::new(),
66 len: 0,
67 hasher: B::default(),
68 }
69 }
70
71 /// Creates a new HashMap with the provided `cap`acity, allowing insertions without
72 /// reallocation. The default hasher will be used.
73 pub fn with_cap(cap: usize) -> HashMap<K, V, B> {
74 // TODO: Adjust this to prevent reallocation during `cap` insertions.
75 HashMap {
76 arr: Array::repeat_default(cap),
77 len: 0,
78 hasher: B::default(),
79 }
80 }
81}
82
83impl<K: Hash + Eq, V, B: BuildHasher> HashMap<K, V, B> {
84 /// Creates a new HashMap with capacity 0 and the provided `hasher`.
85 pub fn with_hasher(hasher: B) -> HashMap<K, V, B> {
86 HashMap {
87 arr: Array::new(),
88 len: 0,
89 hasher,
90 }
91 }
92
93 /// Creates a new HashMap with the provided `cap`acity and `hasher`.
94 pub fn with_cap_and_hasher(cap: usize, hasher: B) -> HashMap<K, V, B> {
95 HashMap {
96 arr: Array::repeat_default(cap),
97 len: 0,
98 hasher,
99 }
100 }
101
102 /// Returns the length of the HashMap (the number of entries it contains).
103 pub const fn len(&self) -> usize {
104 self.len
105 }
106
107 /// Returns true if the HashMap contains no entries.
108 pub const fn is_empty(&self) -> bool {
109 self.len() == 0
110 }
111
112 /// Returns the current capacity of the HashMap.
113 pub const fn cap(&self) -> usize {
114 self.arr.size
115 }
116
117 /// Inserts the provided `key`-`value` pair into the HashMap, increasing its capacity if
118 /// required. If the key was already associated with a value, the previous value is returned.
119 ///
120 /// As with the standard library, the key isn't changed if it already exists.
121 pub fn insert(&mut self, key: K, value: V) -> Option<V> {
122 if self.should_grow() {
123 self.grow()
124 }
125
126 // SAFETY: We've just grown if necessary.
127 let index = unsafe { self.find_index_for_key(&key).unwrap_unchecked() };
128
129 // The bucket at index is either empty or contains an equal key.
130 match &mut self.arr[index] {
131 Some(existing) => {
132 // Replace the value with the provided one.
133 Some(mem::replace(
134 &mut existing.1,
135 value
136 ))
137 },
138 None => {
139 // Create a new bucket with the provided values.
140 self.arr[index] = Some((key, value));
141 self.len += 1;
142 None
143 },
144 }
145 }
146
147 /// Inserts the provided `key`-`value` pair without checking if the HashMap has enough capacity.
148 /// If the key was already associated with a value, the previous value is returned.
149 ///
150 /// As with the standard library, the key isn't changed if it already exists.
151 ///
152 /// # Safety
153 /// It is the responsibility of the caller to ensure that the HashMap has enough capacity to add
154 /// the provided entry, using methods like [`reserve`][HashMap::reserve] or
155 /// [`with_cap`](HashMap::with_cap).
156 ///
157 /// # Panics
158 /// Panics if the HashMap has a capacity of 0, as it isn't possible to find a bucket associated
159 /// with the key.
160 pub unsafe fn insert_unchecked(&mut self, key: K, value: V) -> Option<V> {
161 let index = self.find_index_for_key(&key).ok_or(IndexNoCap).throw();
162
163 // The bucket at index is either empty or contains an equal key.
164 match &mut self.arr[index] {
165 Some(existing) => {
166 // Replace the value with the provided one.
167 Some(mem::replace(
168 &mut existing.1,
169 value
170 ))
171 },
172 None => {
173 // Create a new bucket with the provided values.
174 self.arr[index] = Some((key, value));
175 self.len += 1;
176 None
177 },
178 }
179 }
180
181 /// Returns the entry for the provided `key` as a key-value pair or None if there is no entry.
182 pub fn get_entry<Q>(&self, key: &Q) -> Option<(&K, &V)>
183 where
184 // We're introducing a new type parameter here, Q which represents a borrowed version of K
185 // where equality and hashing carries over the borrow.
186 K: Borrow<Q>,
187 Q: Hash + Eq + ?Sized,
188 {
189 let index = self.find_index_for_key(key)?;
190
191 // If the bucket at index is empty, the map doesn't contain the key.
192 match &self.arr[index] {
193 Some(existing) => Some((&existing.0, &existing.1)),
194 None => None,
195 }
196 }
197
198 /// Returns a reference to the value associated with the provided `key` or None if the map
199 /// contains no values for `key`.
200 pub fn get<Q>(&self, key: &Q) -> Option<&V>
201 where
202 K: Borrow<Q>,
203 Q: Hash + Eq + ?Sized,
204 {
205 let index = self.find_index_for_key(key)?;
206
207 // If the bucket at index is empty, the map doesn't contain the key.
208 match &self.arr[index] {
209 Some(existing) => Some(&existing.1),
210 None => None,
211 }
212 }
213
214 /// Returns a mutable reference to the value associated with the provided `key` or None if the
215 /// map contains no values for `key`.
216 pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
217 where
218 K: Borrow<Q>,
219 Q: Hash + Eq + ?Sized,
220 {
221 let index = self.find_index_for_key(key)?;
222
223 // If the bucket at index is empty, the map doesn't contain the key.
224 match &mut self.arr[index] {
225 Some(existing) => Some(&mut existing.1),
226 None => None,
227 }
228 }
229
230 /// Removes the entry associated with `key`, returning it if it exists.
231 pub fn remove_entry<Q>(&mut self, key: &Q) -> Option<(K, V)>
232 where
233 K: Borrow<Q>,
234 Q: Hash + Eq + ?Sized,
235 {
236 let start = self.find_index_for_key(key)?;
237
238 // If the bucket at index is empty, the map doesn't contain the key.
239 let removed = match mem::take(&mut self.arr[start]) {
240 Some(entry) => {
241 self.len -= 1;
242 entry
243 },
244 None => None?,
245 };
246
247 let mut potential_collisions = Vector::new();
248
249 // UNCHECKED: find_index_for_key returned some, so the cap is not 0.
250 let mut index = (start + 1) % self.cap();
251
252 while self.arr[index].is_some() {
253 // SAFETY: We know that the value at index is some given the loop condition.
254 let entry = unsafe { mem::take(&mut self.arr[index]).unwrap_unchecked() };
255 potential_collisions.push((
256 // SAFETY: We've already propagated a None from find_index_for_key, so
257 // index_from_key will return Some.
258 unsafe { self.index_from_key(&entry.0).unwrap_unchecked() },
259 entry
260 ));
261 index = (index + 1) % self.cap();
262 }
263
264 // Sort by distance to the right from the starting index.
265 potential_collisions.sort_by_key(|(ideal, _)| (*ideal as isize - start as isize).rem_euclid(self.cap() as isize));
266
267 for (ideal, entry) in potential_collisions {
268 let mut index = ideal;
269
270 // Find the closest index on the right of the ideal one, remaining within the original
271 // block naturally.
272 while self.arr[index].is_some() {
273 index = (index + 1) % self.cap();
274 }
275
276 self.arr[index] = Some(entry);
277 }
278
279 Some(removed)
280 }
281
282 /// Removes the entry associated with `key`, returning the value if it exists.
283 pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
284 where
285 K: Borrow<Q>,
286 Q: Hash + Eq + ?Sized,
287 {
288 self.remove_entry(key).map(|(_, v)| v)
289 }
290
291 /// Returns true if there is a value associated with the provided `key`.
292 pub fn contains<Q>(&self, key: &Q) -> bool
293 where
294 K: Borrow<Q>,
295 Q: Hash + Eq + ?Sized,
296 {
297 let index = self.find_index_for_key(key);
298
299 match index {
300 Some(i) => self.arr[i].is_some(),
301 None => false,
302 }
303 }
304
305 /// Increases the capacity of the HashMap to ensure that len + `extra` entries will fit without
306 /// exceeding the load factor.
307 pub fn reserve(&mut self, extra: usize) {
308 let new_cap = self.len.strict_add(extra) * LOAD_FACTOR_DENOMINATOR / LOAD_FACTOR_NUMERATOR;
309 if new_cap <= self.cap() { return; }
310
311 self.realloc_with_cap(new_cap);
312 }
313
314 /// Returns an iterator over all key-value pairs in the HashMap, as references.
315 pub fn iter(&self) -> Iter<'_, K, V> {
316 self.into_iter()
317 }
318
319 /// Consumes self and returns an iterator over all contained keys.
320 pub fn into_keys(self) -> IntoKeys<K, V> {
321 IntoKeys(self.into_iter())
322 }
323
324 /// Returns an iterator over all keys in the HashMap, as references.
325 pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
326 Keys(self.iter())
327 }
328
329 /// Consumes self and returns an iterator over all contained values.
330 pub fn into_values(self) -> IntoValues<K, V> {
331 IntoValues(self.into_iter())
332 }
333
334 /// Returns an iterator over all values in the HashMap, as mutable references.
335 pub fn values_mut<'a>(&'a mut self) -> ValuesMut<'a, K, V> {
336 ValuesMut {
337 len: self.len(),
338 inner: self.arr.iter_mut(),
339 }
340 }
341
342 /// Returns an iterator over all values in the HashMap, as references.
343 pub fn values<'a>(&'a self) -> Values<'a, K, V> {
344 Values(self.iter())
345 }
346}
347
348impl<K: Hash + Eq, V, B: BuildHasher> HashMap<K, V, B> {
349 /// Determines whether the HashMap's length exceeds the load capacity, suggesting that it should
350 /// grow before inserting new entries.
351 pub(crate) const fn should_grow(&self) -> bool {
352 self.len >= self.arr.size * LOAD_FACTOR_NUMERATOR / LOAD_FACTOR_DENOMINATOR
353 }
354
355 /// Grows the HashMap by the growth factor, ensuring that it can hold additional entries.
356 pub(crate) fn grow(&mut self) {
357 let new_cap = cmp::max(self.cap() * GROWTH_FACTOR, MIN_ALLOCATED_CAP);
358
359 self.realloc_with_cap(new_cap)
360 }
361
362 /// Reallocates the HashMap to have capacity equal to `new_cap`, if doing so wouldn't cause the
363 /// map to overload. (There isn't a logical way for the map to shrink and drop entries, so this
364 /// isn't allowed.)
365 pub(crate) fn realloc_with_cap(&mut self, new_cap: usize) {
366 // Can't handle dropping values at this point.
367 if new_cap * LOAD_FACTOR_NUMERATOR / LOAD_FACTOR_DENOMINATOR < self.len { return; }
368
369 // Replace the Array first so that we can consume the old Array.
370 let old_arr = mem::replace(&mut self.arr, Array::repeat_default(new_cap));
371
372 for entry in old_arr.into_iter().flatten() {
373 // SAFETY: If the new capacity is 0, the old_arr has no items and we can't enter
374 // this loop.
375 let index = unsafe { self.find_index_for_key(&entry.0).unwrap_unchecked() };
376
377 // Move the bucket into the new Array.
378 self.arr[index] = Some(entry);
379 }
380 }
381
382 /// Calculates the ideal index of a bucket for the provided `hashable` (or None if the HashMap
383 /// has 0 capacity). This method doesn't consider hash collisions, see
384 /// [`HashMap::find_index_for_key`] for that functionality.
385 pub(crate) fn index_from_key<H: Hash + ?Sized>(&self, hashable: &H) -> Option<usize> {
386 let key_hash = self.hasher.hash_one(hashable);
387 key_hash.checked_rem(self.cap() as u64).map(|i| i as usize)
388 }
389
390 /// Finds the first valid index for the provided `key` (or None if the HashMap has 0 capacity).
391 /// This is done by calculating the ideal index and then iterating until a bucket is found that
392 /// is empty or has an equal key.
393 pub(crate) fn find_index_for_key<Q>(&self, key: &Q) -> Option<usize>
394 where
395 // We're introducing a new type parameter here, Q which represents a borrowed version of K
396 // where equality and hashing carries over the borrow.
397 K: Borrow<Q>,
398 Q: Hash + Eq + ?Sized,
399 {
400 let mut index = self.index_from_key(&key)?;
401
402 // This is where Eq comes in: while there is a value at the current index, but the key
403 // isn't equal, increment the index (wrapping at the capacity) and check again.
404 // Can't enter recursion unless the load factor is 100%.
405 while let Some(existing) = &self.arr[index]
406 && existing.0.borrow() != key
407 {
408 // UNCHECKED: find_index_for_key returned some, so the cap is not 0.
409 index = (index + 1) % self.cap();
410 }
411
412 // After that loop, index is either empty or contains an equal key.
413 Some(index)
414 }
415}
416
417impl<K: Hash + Eq, V, B: BuildHasher + Default> Default for HashMap<K, V, B> {
418 fn default() -> Self {
419 HashMap::new()
420 }
421}
422
423impl<Q, K, V> Index<&Q> for HashMap<K, V>
424where
425 Q: Hash + Eq + ?Sized,
426 K: Hash + Eq + Borrow<Q>,
427{
428 type Output = V;
429
430 fn index(&self, key: &Q) -> &Self::Output {
431 let index = self.find_index_for_key(key).ok_or(NoValueForKey).throw();
432
433 match &self.arr[index] {
434 Some((_, v)) => v,
435 None => Err(NoValueForKey).throw(),
436 }
437 }
438}
439
440impl<K: Hash + Eq, V, B: BuildHasher + Default> Extend<(K, V)> for HashMap<K, V, B> {
441 fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
442 for (key, value) in iter {
443 self.insert(key, value);
444 }
445 }
446
447 fn extend_one(&mut self, item: (K, V)) {
448 self.insert(item.0, item.1);
449 }
450
451 fn extend_reserve(&mut self, additional: usize) {
452 self.reserve(additional);
453 }
454
455 unsafe fn extend_one_unchecked(&mut self, item: (K, V))
456 where
457 Self: Sized,
458 {
459 // SAFETY: extend_reserve is implemented correctly, so all other safety requirements are the
460 // responsibility of the caller.
461 unsafe { self.insert_unchecked(item.0, item.1); }
462 }
463}
464
465impl<K, V, B, I> From<I> for HashMap<K, V, B>
466where
467 K: Hash + Eq,
468 B: BuildHasher + Default,
469 I: Iterator<Item = (K, V)> + ExactSizeIterator + TrustedLen,
470{
471 fn from(value: I) -> Self {
472 let iter = value.into_iter();
473 let mut map = HashMap::with_cap(iter.len());
474
475 for (key, value) in iter {
476 // SAFETY: HashMap has been created with the right capacity.
477 unsafe { map.insert_unchecked(key, value); }
478 }
479
480 map
481 }
482}
483
484impl<K: Hash + Eq, V, B: BuildHasher + Default> FromIterator<(K, V)> for HashMap<K, V, B> {
485 fn from_iter<I: IntoIterator<Item = (K, V)>>(value: I) -> Self {
486 let iter = value.into_iter();
487 let mut map = HashMap::with_cap(iter.size_hint().0);
488 for (key, value) in iter {
489 map.insert(key, value);
490 }
491 map
492 }
493}
494
495// TODO: impl PartialEq and Eq
496
497impl<K: Hash + Eq + Debug, V: Debug, B: BuildHasher + Debug> Debug for HashMap<K, V, B> {
498 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
499 f.debug_struct("HashMap")
500 .field_with("buckets", |f| f.debug_list().entries(
501 self.arr.iter()
502 .map(|o| DebugRaw(match o {
503 Some((k, v)) => format!("({k:?}: {v:?})"),
504 None => "_".into(),
505 }))
506 ).finish())
507 .field("len", &self.len)
508 .field("cap", &self.cap())
509 .field("hasher", &self.hasher)
510 .finish()
511 }
512}
513
514impl<K: Hash + Eq + Debug, V: Debug, B: BuildHasher> Display for HashMap<K, V, B> {
515 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
516 write!(f, "#")?;
517 f.debug_map().entries(self.iter()).finish()
518 }
519}