commonlibsse_ng\re\b\BSTHashMap/
mod.rs

1//! # BSTHashMap
2//!
3//! C++: https://github.com/SARDONYX-forks/CommonLibVR/blob/feature/add-ng-release-ci/include/RE/B/BSTHashMap.h
4//!
5//! ## Expected Memory Layout (Simplified)
6//!
7//! This section explains the internal structure of `BSTHashMap`, focusing on how
8//! entries are stored and how chains are formed when collisions occur.
9//!
10//! ---
11//!
12//! ### Basic Structure (Before Insertion)
13//!
14//! Empty is represented by a byte slice of 0 bytes. This is because it is zero initialized at the same time as the memory allocation.
15//! However, it is impossible to determine whether it is already initialized or `false`.
16//! Therefore, when pushing, the pointer to the next entry is filled with a sentinel value(0xdeadbeef address).
17//!
18//! In this way, it is possible to distinguish between empty and occupied.
19//! In other words, by looking at next, it is possible to tell if data is contained.
20//!
21//! ```text
22//! // Entry<T> array
23//! // Accessed via get_entry_for(...)
24//!
25//! ┌────────────┬────────────┬────────────┬────────────┐
26//! │ Entry[0]   │ Entry[1]   │ Entry[2]   │ ...        │  ← capacity N
27//! ├────────────┼────────────┼────────────┼────────────┤
28//! │ empty      │ empty      │ empty      │ ...        │
29//! └────────────┴────────────┴────────────┴────────────┘
30//! ```
31//!
32//! ---
33//!
34//! ### Basic Insertion Case (Slot Is Empty)
35//!
36//! ```text
37//! // Emplace directly into Entry[i] corresponding to unwrap_key(a_value)!
38//!
39//! ┌────────────┬────────────┬────────────┬────────────┐
40//! │ Entry[0]   │ Entry[1]   │ Entry[2]   │ ...        │
41//! ├────────────┼────────────┼────────────┼────────────┤
42//! │ empty      │            │ value_data: a_value     │
43//! │            │            │ next: _sentinel         │
44//! └────────────┴────────────┴────────────┴────────────┘
45//!                                    ↑
46//!                     get_entry_for(key)
47//! ```
48//!
49//! ---
50//!
51//! ### Collision Case (Chaining with `next` Pointer)
52//!
53//! ```text
54//! // Entry[2] is occupied → emplace into a new free Entry (e.g., Entry[4])
55//! // Update Entry[2].next to point to Entry[4]
56//!
57//! ┌────────────┬────────────┬────────────┬────────────┬────────────┐
58//! │ Entry[0]   │ Entry[1]   │ Entry[2]   │ Entry[3]   │ Entry[4]   │
59//! ├────────────┼────────────┼────────────┼────────────┼────────────┤
60//! │ empty      │            │ value_data: A           │ unused     │ value_data: B
61//! │            │            │ next ___________________/            │ next: _sentinel
62//! └────────────┴────────────┴────────────┴────────────┴────────────┘
63//!                             ↑
64//!                                entry->next = &Entry[4]
65//! ```
66//!
67//! ---
68//!
69//! ### Eviction Case (Entry Needs Relocation)
70//!
71//! ```text
72//! // A is in Entry[2] but belongs in Entry[0]
73//! // Move A to Entry[4] → Emplace B into Entry[2]
74//!
75//! ┌────────────┬────────────┬────────────┬────────────┬────────────┐
76//! │ Entry[0]   │ Entry[1]   │ Entry[2]   │ Entry[3]   │ Entry[4]   │
77//! ├────────────┼────────────┼────────────┼────────────┼────────────┤
78//! │            │            │ value_data: B           │ unused     │ value_data: A
79//! │            │            │ next: _sentinel         │            │ next: _sentinel
80//! │            │            │                         │            │
81//! │            │            │ ← emplace here          │ ← evicted  │
82//! └────────────┴────────────┴────────────┴────────────┴────────────┘
83//! ```
84// # Implementation notes
85// The value is initialized to 0 when allocated, but this is not always valid as Key and Value (e.g. &CStr, NonZeroUsize, etc.), and if it is accessed as Key and Value by mistake, it is undefined behavior.
86
87#![allow(clippy::type_repetition_in_bounds)]
88mod allocator;
89mod entry;
90mod hasher;
91mod scatter_table;
92
93pub use self::allocator::{Allocator, BSTScatterTableHeapAllocator, BSTStaticHashMapBaseAllocator};
94pub use self::hasher::{BSTScatterKeyExtractor, KeyStrategy};
95
96use self::entry::SentinelPtr;
97use self::entry::{EntryType, Iter};
98use self::scatter_table::BSTScatterTable;
99use core::fmt;
100use core::hash::Hash;
101
102/// dummy type
103#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
104#[repr(transparent)]
105pub struct UnkKey(i32);
106
107/// dummy value
108#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
109#[repr(transparent)]
110pub struct UnkValue(i32);
111
112#[repr(transparent)]
113pub struct BSTHashMap<K, V>(
114    BSTScatterTable<BSTScatterKeyExtractor<K, V>, BSTScatterTableHeapAllocator>,
115)
116where
117    K: Hash + Eq;
118const _: () = assert!(core::mem::size_of::<BSTHashMap<(), ()>>() == 0x30);
119
120impl<K, V> fmt::Debug for BSTHashMap<K, V>
121where
122    K: Hash + Eq,
123    K: fmt::Debug,
124    V: fmt::Debug,
125{
126    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127        let mut map = f.debug_map();
128        for (k, v) in self.iter() {
129            map.entry(k, v);
130        }
131        map.finish()
132    }
133}
134
135impl<K, V> Default for BSTHashMap<K, V>
136where
137    K: Hash + Eq,
138{
139    fn default() -> Self {
140        Self(Default::default())
141    }
142}
143
144impl<K, V> BSTHashMap<K, V>
145where
146    K: Hash + Eq,
147{
148    pub fn new() -> Self {
149        Self(BSTScatterTable::default())
150    }
151
152    pub fn get(&self, key: &K) -> Option<&V> {
153        let binding = self.0.get_entry_start();
154        let value = binding.iter().find(|value| unsafe {
155            value.as_non_sentinel_ref().is_some_and(|entry| entry.value_data.0 == *key)
156        })?;
157
158        let pair = unsafe { &value.as_non_sentinel_ref()?.value_data };
159        Some(&pair.1)
160    }
161
162    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
163        let mut binding = self.0.get_entry_start();
164        let value = binding.iter_mut().find(|value| unsafe {
165            value.as_non_sentinel_ref().is_some_and(|entry| entry.value_data.0 == *key)
166        })?;
167
168        let pair = unsafe { &mut value.as_non_sentinel_mut()?.value_data };
169        Some(&mut pair.1)
170    }
171
172    // TODO: Return prev Option<(K, V)>
173    pub fn insert(&mut self, key: K, value: V) -> (Iter<(K, V)>, bool) {
174        let (pair, res) = self.0.insert((key, value));
175        (pair, res)
176    }
177
178    pub fn clear(&mut self) {
179        self.0.clear();
180    }
181
182    pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut (K, V)> {
183        self.0.get_entry_start().into_iter().flat_map(|entries| {
184            (0..self.0.capacity).filter_map(move |i| unsafe {
185                Some(&mut entries.add(i as usize).as_non_sentinel_mut()?.value_data)
186            })
187        })
188    }
189}
190
191////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
192
193// Iterator
194pub struct BSTHashMapIter<'a, K, V>
195where
196    K: Hash + Eq,
197{
198    entries: Option<SentinelPtr<EntryType<(K, V)>>>,
199    capacity: u32,
200    index: usize,
201    current: Option<&'a EntryType<(K, V)>>,
202}
203
204impl<K, V> BSTHashMap<K, V>
205where
206    K: Hash + Eq,
207{
208    pub fn iter(&self) -> BSTHashMapIter<'_, K, V> {
209        let entries = self.0.get_entry_start();
210        BSTHashMapIter { entries, capacity: self.0.capacity, index: 0, current: None }
211    }
212}
213
214impl<'a, K, V> Iterator for BSTHashMapIter<'a, K, V>
215where
216    K: Hash + Eq,
217{
218    type Item = (&'a K, &'a V);
219
220    fn next(&mut self) -> Option<Self::Item> {
221        unsafe {
222            loop {
223                // Process the next chain entry, if any
224                if let Some(curr) = self.current {
225                    match &curr.next {
226                        Some(next) => {
227                            let Some(next_ref) = next.as_non_sentinel_ref() else {
228                                self.current = None;
229                                continue;
230                            };
231                            self.current = Some(next_ref);
232                            let (k, v) = &next_ref.value_data;
233                            return Some((k, v));
234                        }
235                        None => self.current = None,
236                    }
237                }
238
239                // Next entry after the chain is finished.
240                if self.index >= (self.capacity as usize) {
241                    return None;
242                }
243
244                let entry = self.entries?.add(self.index).as_non_sentinel_ref()?;
245                self.index += 1;
246
247                if !entry.is_occupied() {
248                    continue;
249                }
250
251                let (k, v) = &entry.value_data;
252                return Some((k, v));
253            }
254        }
255    }
256}
257
258////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
259// Debug
260
261impl<K, V> BSTHashMap<K, V>
262where
263    K: Hash + Eq + fmt::Debug,
264    V: fmt::Debug,
265{
266    /// Show the memory layout of the `BSTHashMap`.
267    ///
268    /// # Errors
269    /// Failed to write string.
270    ///
271    /// # Examples
272    /// ```
273    /// use commonlibsse_ng::re::BSTHashMap::BSTHashMap;
274    ///
275    /// let mut map = BSTHashMap::new();
276    /// map.insert(42, "hello");
277    /// map.insert(17, "world");
278    /// map.insert(99, "extra");
279    /// map.insert(13, "foo");
280    ///
281    /// print!("{}", map.show_memory_layout().unwrap());
282    ///
283    /// // Output =>
284    ///
285    /// // Memory Layout Visualization:
286    /// // Capacity: 8
287    /// // Free slots: 4
288    /// // ------------------------------------------
289    /// // [000] 17 => "world"
290    /// // [001] 13 => "foo"
291    /// // [002] 42 => "hello" -> 17 => "world"
292    /// // [003] EMPTY (Non-occupied)
293    /// // [004] EMPTY (Non-occupied)
294    /// // [005] 99 => "extra" -> 13 => "foo"
295    /// // [006] EMPTY (Non-occupied)
296    /// // [007] EMPTY (Non-occupied)
297    /// // ------------------------------------------
298    /// ```
299    #[deprecated(
300        note = "Using this function in a game will result in a SIGV. Cause currently unknown."
301    )]
302    pub fn show_memory_layout(&self) -> std::io::Result<String> {
303        use std::io::Write as _;
304
305        let mut w = Vec::new();
306
307        let Some(entries) = self.0.get_entry_start() else {
308            writeln!(&mut w, "No entries allocated.")?;
309            return Ok(String::from_utf8_lossy(&w).to_string());
310        };
311
312        writeln!(&mut w, "Memory Layout Visualization:")?;
313        writeln!(&mut w, "Capacity: {}", self.0.capacity)?;
314        writeln!(&mut w, "Free slots: {}", self.0.free)?;
315        writeln!(&mut w, "------------------------------------------")?;
316
317        for i in 0..self.0.capacity {
318            let entry = unsafe { entries.add(i as usize) };
319
320            if !Self::check_valid_ptr(entry) {
321                continue;
322            }
323
324            let Some(entry_ref) = (unsafe { entry.as_non_sentinel_ref() }) else {
325                writeln!(&mut w, "[{:03}] EMPTY", i)?;
326                continue;
327            };
328
329            if !entry_ref.is_occupied() {
330                writeln!(&mut w, "[{:03}] EMPTY (Non-occupied)", i)?;
331                continue;
332            }
333
334            let (k, v) = &entry_ref.value_data;
335            let mut chain = vec![format!("{:?} => {:?}", k, v)];
336
337            // follow chain if exists
338            let mut next = entry_ref.next;
339            while let Some(next_ptr) = next {
340                if !Self::check_valid_ptr(next_ptr) {
341                    continue;
342                }
343
344                let Some(next_ref) = (unsafe { next_ptr.as_non_sentinel_ref() }) else {
345                    break;
346                };
347                if !next_ref.is_occupied() {
348                    break;
349                }
350
351                let (nk, nv) = &next_ref.value_data;
352                chain.push(format!("{:?} => {:?}", nk, nv));
353                next = next_ref.next;
354            }
355
356            writeln!(&mut w, "[{:03}] {}", i, chain.join(" -> "))?;
357        }
358
359        writeln!(&mut w, "------------------------------------------")?;
360
361        Ok(String::from_utf8_lossy(&w).to_string())
362    }
363
364    /// The reason is unknown, but sometimes `usize::MAX` is included in the game for some reason.
365    ///
366    /// This is a check to avoid a dereference crash in that case.
367    fn check_valid_ptr(entry: SentinelPtr<EntryType<(K, V)>>) -> bool {
368        let is_ok = entry.as_ptr().addr() != 0xFFFFFFFFFFFFFFFF;
369
370        #[cfg(feature = "tracing")]
371        if !is_ok {
372            tracing::error!("Couldn't access memory region: (ptr: {entry:?})");
373        }
374
375        is_ok
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382
383    #[test]
384    fn test_hashmap() {
385        macro_rules! insert_and_expect {
386            ($map:expr, ($key:expr, $val:expr) => $should_insert:expr => $expected_val:expr) => {{
387                let (mut iter, inserted) = $map.insert($key, $val);
388                assert_eq!(inserted, $should_insert, "inserted mismatch for key {}", $key);
389
390                let actual = iter.next().map(|entry| entry.value_data);
391                let expected = Some(($key, $expected_val));
392                assert_eq!(actual, expected, "value mismatch for key {}", $key);
393            }};
394        }
395
396        let mut map = BSTHashMap::new();
397
398        insert_and_expect!(map, (0, c"Hey") => true => c"Hey");
399        insert_and_expect!(map, (0, c"DuplicatedKey") => false => c"Hey");
400        insert_and_expect!(map, (1, c"Hello") => true => c"Hello");
401        insert_and_expect!(map, (2, c"World") => true => c"World");
402
403        for (k, v) in map.iter() {
404            dbg!(k, v);
405        }
406    }
407
408    #[test]
409    fn show_memory() {
410        let mut map = BSTHashMap::new();
411        map.insert(42, "hello");
412        map.insert(17, "world");
413        map.insert(99, "extra");
414        map.insert(13, "foo");
415
416        #[allow(deprecated)]
417        {
418            print!("{}", map.show_memory_layout().unwrap());
419        }
420    }
421}