哈希表（散列表）二次探测

#pragma once #include<iostream> #include<string> using namespace std; enum State { EMPTY, DELETE, EXIST, }; template<class K, class V> struct HashTableNode { K _key; V _value; }; template<class K> struct __HashFunc //默认的返回哈希键值key的仿函数 { size_t operator()(const K& key) { return key; } }; //特化string的__HashFunc 仿函数 template<> struct __HashFunc<string> { size_t operator()(const string& str) { size_t key = 0; for (size_t i = 0; i < str.size(); i++) { key += str[i]; } return key; } }; //实现哈希表的Key/Value形式的二次探测 template<class K, class V, class HashFunc = __HashFunc<K>> class HashTable { typedef HashTableNode<K, V> Node; public: HashTable(size_t capacity = 10) :_tables(new Node[capacity]) , _size(0) , _states(new State[capacity]) , _capacity(capacity) { // memset 有问题是以字节为单位初始化的但第二个参数值为int //memset(_states, EMPTY, sizeof(State) * capacity); for (size_t i = 0; i < capacity; i++) { _states[i] = EMPTY; } } ~HashTable() { if (NULL != _tables) { delete[] _tables; _tables = NULL; } if (NULL != _states) { delete[] _states; _states = NULL; } } bool Insert(const K& key, const V& value) { _CheckCapacity(); //用GetNextIndex 解决哈希冲突 size_t index = _HashFunc(key); // 二次探测 size_t i = 1; while (_states[index] == EXIST) { index = _GetNextIndex(index, i++); if (index >= _capacity) { index = index % _capacity; } } _tables[index]._key = key; _tables[index]._value = value; _states[index] = EXIST; _size++; return true; } Node* Find(const K& key) { size_t index = _HashFunc(key); size_t start = index; size_t i = 1; // 存在或者被删除两种状态 while (_states[index] != EMPTY) { if (_tables[index]._key == key) { if (_states[index] == EXIST) { return index; } else // 被删除 DELETE { return -1; } } index = _GetNextIndex(index, i++); if (index >= _capacity) { index = index % _capacity; } // 因为有填充因子不为100% 不会出现全满且key！=_key 导致死循环的情况 } return -1; } bool Remove(const K& key) { int index = Find(key); if (index != -1) { _states[index] = DELETE; --_size; return true; } return false; } // 二次探测计算出存放位置 size_t _HashFunc(const K& key) { HashFunc hf; return hf(key) % _capacity; // 仿函数hf() } // 哈希冲突时得到下一个index的可以利用上一个index的值这样能提高效率比如 string的index计算就比较费时 size_t _GetNextIndex(size_t prev, size_t i) { return prev + 2 * i - 1; } void Print() { for (size_t i = 0; i < _capacity; i++) { if (_states[i] == EXIST) { cout << i << "EXIST:" << _tables[i]._key << "-------" << _tables[i]._value << endl; } else if (_states[i] == DELETE) { cout << i << "DELETE:" << _tables[i]._key << "-------" << _tables[i]._value << endl; } else { cout << i << "EMPTY:" << _tables[i]._key << "-------" << _tables[i]._value << endl; } } } void Swap(HashTable<K, V, HashFunc>& ht) { swap(_size, ht._size); swap(_states, ht._states); swap(_tables, ht._tables); swap(_capacity, ht._capacity); } protected: void _CheckCapacity() // 扩容 { // 动态的可扩容的 // 高效哈希表的载荷因子大概在0.7-0.8较好 if (10 * _size / _capacity >= 7) // _size/_capacity为0 因为都是××× 所以乘10 // 保证载荷因子在0.7之内 { HashTable<K, V, HashFunc> tmp(2 * _capacity); for (size_t i = 0; i < _capacity; i++) { if (_states[i] == EXIST) { tmp.Insert(_tables[i]._key, _tables[i]._value); } } Swap(tmp); } } protected: Node* _tables; State* _states;//状态表 size_t _size; size_t _capacity; };

相关阅读