Spaces:
Sleeping
Sleeping
File size: 1,782 Bytes
daea7f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
#include "bloom_filter.h"
BloomFilter::BloomFilter(size_t numPlanes, size_t dimension, size_t k)
: numPlanes(numPlanes), dimension(dimension), k(k)
{
generateRandomHyperplanes();
}
void BloomFilter::generateRandomHyperplanes()
{
randomHyperplanes.resize(numPlanes, std::vector<double>(dimension));
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<double> dist(0.0, 1.0);
for (size_t i = 0; i < numPlanes; ++i)
{
double norm = 0.0;
for (size_t j = 0; j < dimension; ++j)
{
randomHyperplanes[i][j] = dist(gen);
norm += randomHyperplanes[i][j] * randomHyperplanes[i][j];
}
norm = std::sqrt(norm);
for (size_t j = 0; j < dimension; ++j)
{
randomHyperplanes[i][j] /= norm;
}
}
}
std::vector<size_t> BloomFilter::hashFunction(const std::vector<double> &featureVector)
{
std::vector<size_t> hashValues(k, 0);
size_t bitsPerHash = numPlanes / k; // chia đều số mặt phẳng cho mỗi hash
for (size_t group = 0; group < k; ++group)
{
size_t hashValue = 0;
size_t start = group * bitsPerHash;
size_t end = (group + 1) * bitsPerHash;
for (size_t i = start; i < end && i < numPlanes; ++i)
{
double dotProduct = 0.0;
for (size_t j = 0; j < dimension; ++j)
{
dotProduct += featureVector[j] * randomHyperplanes[i][j];
}
if (dotProduct > 0)
{
hashValue |= (1ULL << (i - start)); // set bit trong phạm vi nhóm
}
}
hashValues[group] = hashValue;
}
return hashValues;
} |