File size: 1,782 Bytes
daea7f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include "bloom_filter.h"

BloomFilter::BloomFilter(size_t numPlanes, size_t dimension, size_t k)
    : numPlanes(numPlanes), dimension(dimension), k(k)
{
    generateRandomHyperplanes();
}

void BloomFilter::generateRandomHyperplanes()

{
    randomHyperplanes.resize(numPlanes, std::vector<double>(dimension));

    std::random_device rd;
    std::mt19937 gen(rd());
    std::normal_distribution<double> dist(0.0, 1.0);

    for (size_t i = 0; i < numPlanes; ++i)
    {
        double norm = 0.0;
        for (size_t j = 0; j < dimension; ++j)
        {
            randomHyperplanes[i][j] = dist(gen);
            norm += randomHyperplanes[i][j] * randomHyperplanes[i][j];
        }
        norm = std::sqrt(norm);
        for (size_t j = 0; j < dimension; ++j)
        {
            randomHyperplanes[i][j] /= norm;
        }
    }
}

std::vector<size_t> BloomFilter::hashFunction(const std::vector<double> &featureVector)

{
    std::vector<size_t> hashValues(k, 0);
    size_t bitsPerHash = numPlanes / k; // chia đều số mặt phẳng cho mỗi hash

    for (size_t group = 0; group < k; ++group)
    {
        size_t hashValue = 0;
        size_t start = group * bitsPerHash;
        size_t end = (group + 1) * bitsPerHash;

        for (size_t i = start; i < end && i < numPlanes; ++i)
        {
            double dotProduct = 0.0;
            for (size_t j = 0; j < dimension; ++j)
            {
                dotProduct += featureVector[j] * randomHyperplanes[i][j];
            }
            if (dotProduct > 0)
            {
                hashValue |= (1ULL << (i - start)); // set bit trong phạm vi nhóm
            }
        }
        hashValues[group] = hashValue;
    }

    return hashValues;
}