Line data Source code
1 : #ifndef HASHBROWNS_HASH_MAP_H
2 : #define HASHBROWNS_HASH_MAP_H
3 :
4 : #include "core/data_structure.h"
5 : #include "core/memory_manager.h"
6 : #include <cstddef>
7 : #include <cstdint>
8 : #include <string>
9 : #include <utility>
10 : #include <functional>
11 : #include <cmath>
12 : #include <new>
13 : #include <stdexcept>
14 :
15 : namespace hashbrowns {
16 :
17 : enum class HashStrategy { OPEN_ADDRESSING, SEPARATE_CHAINING };
18 :
19 : class HashMap : public DataStructure {
20 : public:
21 29 : explicit HashMap(HashStrategy strategy = HashStrategy::OPEN_ADDRESSING,
22 : std::size_t initial_capacity = 16)
23 29 : : strategy_(strategy) {
24 29 : if (initial_capacity == 0) initial_capacity = 16;
25 29 : if (strategy_ == HashStrategy::OPEN_ADDRESSING) {
26 24 : oa_init(capacity_round_up(initial_capacity));
27 : } else {
28 5 : sc_init(capacity_round_up(initial_capacity));
29 : }
30 29 : }
31 :
32 49 : ~HashMap() override { clear(); free_storage(); }
33 :
34 : // DataStructure interface
35 972 : void insert(int key, const std::string& value) override {
36 972 : if (strategy_ == HashStrategy::OPEN_ADDRESSING) oa_insert(key, value); else sc_insert(key, value);
37 972 : }
38 :
39 920 : bool search(int key, std::string& value) const override {
40 920 : return (strategy_ == HashStrategy::OPEN_ADDRESSING) ? oa_search(key, value) : sc_search(key, value);
41 : }
42 :
43 612 : bool remove(int key) override {
44 612 : return (strategy_ == HashStrategy::OPEN_ADDRESSING) ? oa_remove(key) : sc_remove(key);
45 : }
46 :
47 6 : size_t size() const override { return size_; }
48 4 : bool empty() const override { return size_ == 0; }
49 :
50 29 : void clear() override {
51 29 : if (strategy_ == HashStrategy::OPEN_ADDRESSING) oa_clear(); else sc_clear();
52 29 : size_ = 0;
53 29 : metrics_reset();
54 29 : }
55 :
56 6 : size_t memory_usage() const override {
57 6 : if (strategy_ == HashStrategy::OPEN_ADDRESSING) {
58 5 : return capacity_ * sizeof(OAEntry) + sizeof(*this);
59 : } else {
60 1 : return capacity_ * sizeof(SCNode*) + sc_node_count_ * sizeof(SCNode) + sizeof(*this);
61 : }
62 : }
63 :
64 6 : std::string type_name() const override { return "HashMap"; }
65 6 : std::string insert_complexity() const override { return strategy_ == HashStrategy::OPEN_ADDRESSING ? "O(1) avg" : "O(1) avg"; }
66 6 : std::string search_complexity() const override { return strategy_ == HashStrategy::OPEN_ADDRESSING ? "O(1) avg" : "O(1) avg"; }
67 6 : std::string remove_complexity() const override { return strategy_ == HashStrategy::OPEN_ADDRESSING ? "O(1) avg" : "O(1) avg"; }
68 :
69 3 : HashStrategy strategy() const { return strategy_; }
70 1 : void set_strategy(HashStrategy s) {
71 1 : if (s == strategy_) return;
72 : // Rebuild into new strategy
73 : // Extract entries
74 : // Simple approach: iterate and re-insert into new structure
75 : // For brevity, only support rebuilding when map is empty
76 1 : if (size_ != 0) {
77 0 : throw std::runtime_error("Changing HashMap strategy requires empty map");
78 : }
79 1 : free_storage();
80 1 : strategy_ = s;
81 1 : if (strategy_ == HashStrategy::OPEN_ADDRESSING) oa_init(16); else sc_init(16);
82 : }
83 :
84 5 : void set_max_load_factor(double lf) {
85 5 : if (lf <= 0.0) lf = 0.5;
86 5 : if (strategy_ == HashStrategy::OPEN_ADDRESSING) {
87 4 : oa_max_load_factor_ = lf;
88 4 : load_threshold_ = static_cast<std::size_t>(capacity_ * oa_max_load_factor_);
89 : } else {
90 1 : sc_max_load_factor_ = lf;
91 : }
92 5 : }
93 3 : double max_load_factor() const {
94 3 : return (strategy_ == HashStrategy::OPEN_ADDRESSING) ? oa_max_load_factor_ : sc_max_load_factor_;
95 : }
96 :
97 : private:
98 : // Hash helpers
99 29 : static std::size_t capacity_round_up(std::size_t n) {
100 : // round to power of two
101 29 : std::size_t cap = 1;
102 147 : while (cap < n) cap <<= 1;
103 29 : return cap;
104 : }
105 :
106 3658 : static std::size_t mix(std::uint64_t x) {
107 : // simple 64-bit mix (splitmix64)
108 3658 : x += 0x9e3779b97f4a7c15ULL;
109 3658 : x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
110 3658 : x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
111 3658 : x = x ^ (x >> 31);
112 3658 : return static_cast<std::size_t>(x);
113 : }
114 :
115 : // OPEN ADDRESSING
116 : enum class OAState : uint8_t { EMPTY, OCCUPIED, TOMBSTONE };
117 : struct OAEntry {
118 : int key;
119 : std::string value;
120 : OAState state;
121 3666 : OAEntry() : key(0), value(), state(OAState::EMPTY) {}
122 : };
123 :
124 62 : void oa_init(std::size_t cap) {
125 62 : capacity_ = cap;
126 62 : load_threshold_ = static_cast<std::size_t>(capacity_ * oa_max_load_factor_);
127 62 : oa_entries_ = std::allocator_traits<TrackedAllocator<OAEntry>>::allocate(oa_alloc_, capacity_);
128 3422 : for (std::size_t i = 0; i < capacity_; ++i) {
129 3360 : std::allocator_traits<TrackedAllocator<OAEntry>>::construct(oa_alloc_, &oa_entries_[i]);
130 : }
131 62 : }
132 :
133 23 : void oa_clear() {
134 23 : if (!oa_entries_) return;
135 1975 : for (std::size_t i = 0; i < capacity_; ++i) {
136 1952 : if (oa_entries_[i].state == OAState::OCCUPIED) {
137 306 : oa_entries_[i].value.~basic_string();
138 : // re-construct default to EMPTY without tracking again
139 306 : new (&oa_entries_[i]) OAEntry();
140 1646 : } else if (oa_entries_[i].state == OAState::TOMBSTONE) {
141 559 : oa_entries_[i].state = OAState::EMPTY;
142 : }
143 : }
144 : }
145 :
146 24 : void oa_free() {
147 24 : if (!oa_entries_) return;
148 1984 : for (std::size_t i = 0; i < capacity_; ++i) {
149 1960 : std::allocator_traits<TrackedAllocator<OAEntry>>::destroy(oa_alloc_, &oa_entries_[i]);
150 : }
151 24 : std::allocator_traits<TrackedAllocator<OAEntry>>::deallocate(oa_alloc_, oa_entries_, capacity_);
152 24 : oa_entries_ = nullptr;
153 : }
154 :
155 38 : void oa_grow() {
156 38 : std::size_t new_cap = capacity_ ? capacity_ * 2 : 16;
157 38 : OAEntry* old = oa_entries_;
158 38 : std::size_t old_cap = capacity_;
159 38 : bool prev_enabled = metrics_enabled_;
160 38 : metrics_enabled_ = false; // do not count internal rehashing
161 38 : oa_init(new_cap);
162 38 : std::size_t old_size = size_;
163 38 : size_ = 0;
164 1438 : for (std::size_t i = 0; i < old_cap; ++i) {
165 1400 : if (old[i].state == OAState::OCCUPIED) {
166 963 : oa_insert(old[i].key, old[i].value);
167 : }
168 : }
169 : // destroy old entries and deallocate
170 1438 : for (std::size_t i = 0; i < old_cap; ++i) {
171 1400 : std::allocator_traits<TrackedAllocator<OAEntry>>::destroy(oa_alloc_, &old[i]);
172 : }
173 38 : std::allocator_traits<TrackedAllocator<OAEntry>>::deallocate(oa_alloc_, old, old_cap);
174 38 : size_ = old_size; // oa_insert already updated size_
175 38 : metrics_enabled_ = prev_enabled;
176 38 : }
177 :
178 1829 : void oa_insert(int key, const std::string& value) {
179 1829 : if (size_ + tombstones_ >= load_threshold_) {
180 38 : oa_grow();
181 38 : tombstones_ = 0;
182 : }
183 1829 : std::size_t mask = capacity_ - 1;
184 1829 : std::size_t idx = mix(static_cast<std::uint64_t>(key)) & mask;
185 1829 : std::size_t first_tomb = capacity_;
186 1829 : std::size_t probes = 0;
187 : while (true) {
188 3204 : ++probes;
189 3204 : OAEntry& e = oa_entries_[idx];
190 3204 : if (e.state == OAState::EMPTY) {
191 1828 : std::size_t target = (first_tomb < capacity_) ? first_tomb : idx;
192 1828 : OAEntry& slot = oa_entries_[target];
193 1828 : if (slot.state == OAState::TOMBSTONE) { tombstones_--; }
194 1828 : slot.key = key;
195 1828 : new (&slot.value) std::string(value);
196 1828 : slot.state = OAState::OCCUPIED;
197 1828 : ++size_;
198 1828 : if (metrics_enabled_) { probe_insert_sum_ += probes; ++insert_ops_; }
199 1828 : return;
200 1376 : } else if (e.state == OAState::TOMBSTONE) {
201 0 : if (first_tomb == capacity_) first_tomb = idx;
202 1376 : } else if (e.key == key) {
203 1 : e.value = value; // update
204 1 : if (metrics_enabled_) { probe_insert_sum_ += probes; ++insert_ops_; }
205 1 : return;
206 : }
207 1375 : idx = (idx + 1) & mask; // linear probing
208 1375 : }
209 : }
210 :
211 714 : bool oa_search(int key, std::string& value) const {
212 714 : std::size_t mask = capacity_ - 1;
213 714 : std::size_t idx = mix(static_cast<std::uint64_t>(key)) & mask;
214 714 : std::size_t probes = 0;
215 : while (true) {
216 1163 : ++probes;
217 1163 : const OAEntry& e = oa_entries_[idx];
218 1163 : if (e.state == OAState::EMPTY) { if (metrics_enabled_) { probe_search_sum_ += probes; ++search_ops_; } return false; }
219 1111 : if (e.state == OAState::OCCUPIED && e.key == key) { value = e.value; if (metrics_enabled_) { probe_search_sum_ += probes; ++search_ops_; } return true; }
220 449 : idx = (idx + 1) & mask;
221 449 : }
222 : }
223 :
224 560 : bool oa_remove(int key) {
225 560 : std::size_t mask = capacity_ - 1;
226 560 : std::size_t idx = mix(static_cast<std::uint64_t>(key)) & mask;
227 560 : std::size_t probes = 0;
228 : while (true) {
229 836 : ++probes;
230 836 : OAEntry& e = oa_entries_[idx];
231 836 : if (e.state == OAState::EMPTY) { if (metrics_enabled_) { probe_remove_sum_ += probes; ++remove_ops_; } return false; }
232 835 : if (e.state == OAState::OCCUPIED && e.key == key) {
233 559 : e.value.~basic_string();
234 559 : e.state = OAState::TOMBSTONE;
235 559 : --size_;
236 559 : ++tombstones_;
237 559 : if (metrics_enabled_) { probe_remove_sum_ += probes; ++remove_ops_; }
238 559 : return true;
239 : }
240 276 : idx = (idx + 1) & mask;
241 276 : }
242 : }
243 :
244 : // SEPARATE CHAINING
245 : struct SCNode { int key; std::string value; SCNode* next; };
246 :
247 11 : void sc_init(std::size_t cap) {
248 11 : capacity_ = cap;
249 11 : buckets_ = std::allocator_traits<TrackedAllocator<SCNode*>>::allocate(sc_bucket_alloc_, capacity_);
250 571 : for (std::size_t i = 0; i < capacity_; ++i) {
251 560 : buckets_[i] = nullptr;
252 : }
253 11 : }
254 :
255 6 : void sc_clear() {
256 6 : if (!buckets_) return;
257 318 : for (std::size_t i = 0; i < capacity_; ++i) {
258 312 : SCNode* n = buckets_[i];
259 366 : while (n) {
260 54 : SCNode* next = n->next;
261 54 : destroy_node(n);
262 54 : n = next;
263 : }
264 312 : buckets_[i] = nullptr;
265 : }
266 6 : sc_node_count_ = 0;
267 : }
268 :
269 6 : void sc_free() {
270 6 : if (!buckets_) return;
271 6 : std::allocator_traits<TrackedAllocator<SCNode*>>::deallocate(sc_bucket_alloc_, buckets_, capacity_);
272 6 : buckets_ = nullptr;
273 : }
274 :
275 297 : void sc_grow_if_needed() {
276 297 : if (size_ <= capacity_ * sc_max_load_factor_) return;
277 5 : std::size_t new_cap = capacity_ ? capacity_ * 2 : 16;
278 : // rehash
279 5 : auto old_buckets = buckets_;
280 5 : std::size_t old_cap = capacity_;
281 5 : bool prev_enabled = metrics_enabled_;
282 5 : metrics_enabled_ = false; // disable during rehash
283 5 : sc_init(new_cap);
284 5 : std::size_t old_size = size_;
285 5 : size_ = 0;
286 253 : for (std::size_t i = 0; i < old_cap; ++i) {
287 248 : SCNode* n = old_buckets[i];
288 439 : while (n) {
289 191 : sc_insert(n->key, n->value);
290 191 : SCNode* next = n->next;
291 191 : destroy_node(n);
292 191 : n = next;
293 : }
294 : }
295 5 : std::allocator_traits<TrackedAllocator<SCNode*>>::deallocate(sc_bucket_alloc_, old_buckets, old_cap);
296 5 : size_ = old_size; // already updated by sc_insert
297 5 : metrics_enabled_ = prev_enabled;
298 : }
299 :
300 297 : void sc_insert(int key, const std::string& value) {
301 297 : sc_grow_if_needed();
302 297 : std::size_t idx = mix(static_cast<std::uint64_t>(key)) & (capacity_ - 1);
303 : // check existing
304 297 : std::size_t probes = 0;
305 395 : for (SCNode* n = buckets_[idx]; n; n = n->next) {
306 99 : ++probes;
307 99 : if (n->key == key) { n->value = value; if (metrics_enabled_) { probe_insert_sum_ += probes; ++insert_ops_; } return; }
308 : }
309 296 : SCNode* node = create_node(key, value);
310 296 : node->next = buckets_[idx];
311 296 : buckets_[idx] = node;
312 296 : ++size_;
313 296 : ++sc_node_count_;
314 296 : ++probes; // count placement as a probe
315 296 : if (metrics_enabled_) { probe_insert_sum_ += probes; ++insert_ops_; }
316 : }
317 :
318 206 : bool sc_search(int key, std::string& value) const {
319 206 : std::size_t idx = mix(static_cast<std::uint64_t>(key)) & (capacity_ - 1);
320 206 : std::size_t probes = 0;
321 240 : for (SCNode* n = buckets_[idx]; n; n = n->next) {
322 188 : ++probes;
323 188 : if (n->key == key) { value = n->value; if (metrics_enabled_) { probe_search_sum_ += probes; ++search_ops_; } return true; }
324 : }
325 52 : if (metrics_enabled_) { probe_search_sum_ += probes; ++search_ops_; }
326 52 : return false;
327 : }
328 :
329 52 : bool sc_remove(int key) {
330 52 : std::size_t idx = mix(static_cast<std::uint64_t>(key)) & (capacity_ - 1);
331 52 : SCNode* prev = nullptr;
332 52 : SCNode* cur = buckets_[idx];
333 52 : std::size_t probes = 0;
334 58 : while (cur) {
335 57 : ++probes;
336 57 : if (cur->key == key) {
337 51 : if (prev) prev->next = cur->next; else buckets_[idx] = cur->next;
338 51 : destroy_node(cur);
339 51 : --size_;
340 51 : --sc_node_count_;
341 51 : if (metrics_enabled_) { probe_remove_sum_ += probes; ++remove_ops_; }
342 51 : return true;
343 : }
344 6 : prev = cur; cur = cur->next;
345 : }
346 1 : if (metrics_enabled_) { probe_remove_sum_ += probes; ++remove_ops_; }
347 1 : return false;
348 : }
349 :
350 : // Node allocation via MemoryPool
351 296 : SCNode* create_node(int key, const std::string& value) {
352 296 : SCNode* raw = sc_pool_.allocate();
353 296 : new (raw) SCNode{key, value, nullptr};
354 296 : return raw;
355 : }
356 :
357 296 : void destroy_node(SCNode* node) {
358 296 : if (!node) return;
359 296 : node->~SCNode();
360 296 : sc_pool_.deallocate(node);
361 : }
362 :
363 30 : void free_storage() {
364 30 : if (strategy_ == HashStrategy::OPEN_ADDRESSING) oa_free(); else sc_free();
365 30 : }
366 :
367 : public: // metrics API
368 56 : void metrics_reset() {
369 56 : probe_insert_sum_ = probe_search_sum_ = probe_remove_sum_ = 0;
370 56 : insert_ops_ = search_ops_ = remove_ops_ = 0;
371 56 : }
372 9 : double avg_insert_probes() const { return insert_ops_ ? static_cast<double>(probe_insert_sum_) / insert_ops_ : 0.0; }
373 9 : double avg_search_probes() const { return search_ops_ ? static_cast<double>(probe_search_sum_) / search_ops_ : 0.0; }
374 9 : double avg_remove_probes() const { return remove_ops_ ? static_cast<double>(probe_remove_sum_) / remove_ops_ : 0.0; }
375 :
376 : private:
377 : // State shared by both
378 : HashStrategy strategy_;
379 : std::size_t size_ = 0;
380 : std::size_t capacity_ = 0;
381 :
382 : // Open addressing state
383 : TrackedAllocator<OAEntry> oa_alloc_{};
384 : OAEntry* oa_entries_ = nullptr;
385 : std::size_t load_threshold_ = 0;
386 : std::size_t tombstones_ = 0;
387 : double oa_max_load_factor_ = 0.7;
388 :
389 : // Separate chaining state
390 : TrackedAllocator<SCNode*> sc_bucket_alloc_{};
391 : SCNode** buckets_ = nullptr;
392 : MemoryPool<SCNode> sc_pool_{};
393 : std::size_t sc_node_count_ = 0;
394 : double sc_max_load_factor_ = 0.75;
395 :
396 : // Metrics
397 : bool metrics_enabled_ = true;
398 : mutable std::size_t probe_insert_sum_ = 0;
399 : mutable std::size_t probe_search_sum_ = 0;
400 : mutable std::size_t probe_remove_sum_ = 0;
401 : mutable std::size_t insert_ops_ = 0;
402 : mutable std::size_t search_ops_ = 0;
403 : mutable std::size_t remove_ops_ = 0;
404 : };
405 :
406 : } // namespace hashbrowns
407 :
408 : #endif // HASHBROWNS_HASH_MAP_H
|