#include "bloom_filter.h" BloomFilter::BloomFilter(size_t numPlanes, size_t dimension, size_t k) : numPlanes(numPlanes), dimension(dimension), k(k) { generateRandomHyperplanes(); } void BloomFilter::generateRandomHyperplanes() { randomHyperplanes.resize(numPlanes, std::vector(dimension)); std::random_device rd; std::mt19937 gen(rd()); std::normal_distribution dist(0.0, 1.0); for (size_t i = 0; i < numPlanes; ++i) { double norm = 0.0; for (size_t j = 0; j < dimension; ++j) { randomHyperplanes[i][j] = dist(gen); norm += randomHyperplanes[i][j] * randomHyperplanes[i][j]; } norm = std::sqrt(norm); for (size_t j = 0; j < dimension; ++j) { randomHyperplanes[i][j] /= norm; } } } std::vector BloomFilter::hashFunction(const std::vector &featureVector) { std::vector hashValues(k, 0); size_t bitsPerHash = numPlanes / k; // chia đều số mặt phẳng cho mỗi hash for (size_t group = 0; group < k; ++group) { size_t hashValue = 0; size_t start = group * bitsPerHash; size_t end = (group + 1) * bitsPerHash; for (size_t i = start; i < end && i < numPlanes; ++i) { double dotProduct = 0.0; for (size_t j = 0; j < dimension; ++j) { dotProduct += featureVector[j] * randomHyperplanes[i][j]; } if (dotProduct > 0) { hashValue |= (1ULL << (i - start)); // set bit trong phạm vi nhóm } } hashValues[group] = hashValue; } return hashValues; }