// -*- coding: utf-8; mode: c++ -*- // Time-stamp: // Hashsets #ifndef HASHSET_H #define HASHSET_H #include #include #include #include "hashcode.hpp" using namespace std; // Declaration of class hashset template class hashset; // Declaration of output operator template ostream& operator<<(ostream& out, const hashset& hs); // Class for hashsets of values of type T with hashcodes given by class H // The class H should have a method unsigned int hash(const T&) that // gives the hashcode of a value of type T. // The default class hashcode provides such a method for classes T // having a method unsigned int hashcode() and for basic types like // int, float and string. template > class hashset { // Friend output operator friend ostream& operator<< (ostream& out, const hashset& hs); public: // Constructor : empty hashset hashset(); // Destructor ~hashset(); // Return size of the hashset, that is, the number of values it contains int size() const { return hssize; } // Test whether the hashset is empty bool empty() const { return hssize == 0; } // Insert a value into hashset void insert(const T&); // Remove a value from hashset // Return the number of values that have been removed : 0 or 1 int erase(const T&); // Search for a value in hashset bool find(const T&) const; // Internal class for iterators on hashsets // Each iterator on a hasset is represensed by an index of a bucket, that is // an integer in [0 .. mod-1] and iterator it on the list buckets[index]. // The position on the first value is the pair (index,it) where // - index is the least integer such that buckets[index] is non empty // - it is an iterator on the first value in the list buckets[index] // The position after the last value is the pair (mod-1,it) // - it is an iterator after the last value in the list buckets[mod-1] class iterator { // Class hashset can access private members of iterators friend class hashset; public: // Constructors iterator(hashset* h, int n) : hs(h), index(n) { next(); } iterator(hashset* h, int n, typename list::iterator i) : hs(h), index(n), it(i) {} // Return reference to the current value T& operator*() { return *it; } // Go to the next value iterator& operator++() { // If the end of the current list is reached // Search for the next non-empty one if (++it == hs->buckets[index].end()) { ++index; next(); } return *this; } // Equality operator bool operator==(const iterator& rhs) { return index == rhs.index && it == rhs.it; } // Non-equality operator bool operator!=(const iterator& rhs) { return index != rhs.index || it != rhs.it; } private: hashset* hs; // Pointer to the hashset int index; // Index of the current list in buckets typename list::iterator it; // Iterator on buckets[index] // Search for the next non-empty list after index // Iterator it is set to // - begin() of this list if such a non-empty list exists // - end() of buckets[mod-1] otherwise void next() { while (index < hs->mod && hs->buckets[index].empty()) ++index; if (index < hs->mod) it = hs->buckets[index].begin(); else { --index; // index = mod-1 it = hs->buckets[hs->mod-1].end(); } } }; // Iterators // Iterator to the first value typename hashset::iterator begin() { return typename hashset::iterator(this, 0); } // Iterator after the last value typename hashset::iterator end() { return typename hashset::iterator(this, mod); } // Remove a range of values from hashset void erase(typename hashset::iterator first, typename hashset::iterator last); // Swap with another hashset void swap(hashset& rhs) { // Namespace std is necessary since this method masks std::swap std::swap(mod, rhs.mod); std::swap(hssize, rhs.hssize); std::swap(buckets, rhs.buckets); } private: static const int mods[]; // Array of possible modulos int mod; // Modulo used by the hashset static const float load = 0.75; // Load factor (as in HashSet of Java) int hssize; // Number of values in hashset list* buckets; // Array of buckets // Increase buckets size and rehash all values void rehash(); // Search for a value in a list // This function is needed since there is no find method in list // Return // - An interator to the value if it is found in the list // - last otherwise template static InputIterator find(InputIterator first, InputIterator last, const T& value); }; // Array of modulos used by the hashsets (should be increased) // List of primes numbers just below powers of 2 template const int hashset::mods[] = { 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071, 262139, 524287, 1048573, 2097143}; // Constructor template hashset::hashset() : // Initialize mod, hssize and allocate buckets mod(mods[0]), hssize(0), buckets(new list[mod]) {} // Destructor template hashset::~hashset() { // Free buckets array delete [] buckets; } // Insert a value into hashset template void hashset::insert(const T& value) { // Important to use a reference here to really modify the list // and not a copy of it :-) list& l = buckets[H::hash(value) % mod]; // Check first if the value already occurs in the list if (find(l.begin(), l.end(), value) == l.end()) { l.push_back(value); hssize++; } // Increase buckets size if necessary if (hssize > mod * load) rehash(); } // Remove a value from hashset template int hashset::erase(const T& value) { // Important to use a reference here to really modify the list // and not a copy of it :-) list& l = buckets[H::hash(value) % mod]; // Iterator to the value typename list::iterator it = find(l.begin(), l.end(), value); // If the value occurs in the list, remove it if (it != l.end()) { l.erase(it); return 1; // 1 value removed } return 0; // 0 value removed } // Remove a range of values from hashset template void hashset::erase(typename hashset::iterator first, typename hashset::iterator last) { if (first.index < last.index) { // Remove to the end of first list buckets[first.index].erase(first.it, buckets[first.index].end()); // Clear all lists in between for (int i = first.index+1; i < last.index; i++) buckets[i].clear(); // Remove from the begining of the last list buckets[last.index].erase(buckets[last.index].begin(), last.it); } else if (first.index == last.index) { // Remove in the unique list buckets[first.index].erase(first.it, last.it); } } // Search for a value in hashset template bool hashset::find(const T& value) const { const list& l = buckets[H::hash(value) % mod]; return find(l.begin(), l.end(), value) != l.end(); } // Output operator template ostream& operator<<(ostream& out, const hashset& hs) { for (int i = 0; i < hs.mod; i++) { cout << i << " :"; const list& l = hs.buckets[i]; for (typename list::const_iterator it = l.begin(); it != l.end(); ++it) out << ' ' << *it; out << endl; } return out; } // Increase buckets size and rehash all values template void hashset::rehash() { // Search for mod int i = 0; while (mod != mods[i]) i++; // Set newmod to the next one in mods int newmod = mods[i+1]; // Create new buckets array list* newbuckets = new list[newmod]; // Rehash all values for (typename hashset::iterator it = begin(); it != end(); ++it) { list& l = newbuckets[H::hash(*it) % newmod]; l.push_back(*it); } // Set buckets and mod std::swap(buckets, newbuckets); mod = newmod; // Free old buckets array delete [] newbuckets; } // Search for a value in a list template template InputIterator hashset::find(InputIterator first, InputIterator last, const T& value) { for ( ; first != last; ++first) if (*first == value) return first; return last; } #endif