Spaces:
Running
Running
levanel commited on
Commit ·
e87a50a
0
Parent(s):
vecmini1
Browse files- .gitignore +2 -0
- CMakeLists.txt +53 -0
- Dockerfile +45 -0
- app.py +114 -0
- include/IndexFlat.h +16 -0
- include/IndexIVF.h +23 -0
- include/IndexIVFPQ.h +28 -0
- include/IndexPQ.h +18 -0
- include/clustering.h +11 -0
- src/IndexFlat.cpp +86 -0
- src/IndexIVF.cpp +148 -0
- src/IndexIVFPQ.cpp +119 -0
- src/IndexPQ.cpp +93 -0
- src/bindings.cpp +118 -0
- src/clustering.cpp +40 -0
- src/rand.json +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.so
|
| 2 |
+
build/
|
CMakeLists.txt
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cmake_minimum_required(VERSION 3.22)
|
| 2 |
+
project(vecmini VERSION 1.0 LANGUAGES CXX)
|
| 3 |
+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
| 4 |
+
|
| 5 |
+
# Use modern C++17 for cleaner syntax
|
| 6 |
+
set(CMAKE_CXX_STANDARD 17)
|
| 7 |
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
| 8 |
+
set(CMAKE_CXX_EXTENSIONS OFF)
|
| 9 |
+
|
| 10 |
+
# AVX-256 and Optimization Flags (Safe for Hugging Face cloud CPUs)
|
| 11 |
+
add_compile_options(-mavx2 -mfma -O3 -march=x86-64-v3)
|
| 12 |
+
|
| 13 |
+
# Generates the compile_commands.json file for tools like clangd
|
| 14 |
+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
| 15 |
+
|
| 16 |
+
# ==========================================
|
| 17 |
+
# DEPENDENCIES (OpenMP & Math for your engine)
|
| 18 |
+
# ==========================================
|
| 19 |
+
find_package(OpenMP REQUIRED)
|
| 20 |
+
find_package(BLAS REQUIRED)
|
| 21 |
+
find_package(LAPACK REQUIRED)
|
| 22 |
+
|
| 23 |
+
# ==========================================
|
| 24 |
+
# INCLUDES
|
| 25 |
+
# ==========================================
|
| 26 |
+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
|
| 27 |
+
|
| 28 |
+
# ==========================================
|
| 29 |
+
# PHASE 1: Build your core vector database library
|
| 30 |
+
# ==========================================
|
| 31 |
+
add_library(vectordb STATIC
|
| 32 |
+
src/IndexFlat.cpp
|
| 33 |
+
src/clustering.cpp
|
| 34 |
+
src/IndexIVF.cpp
|
| 35 |
+
src/IndexPQ.cpp
|
| 36 |
+
src/IndexIVFPQ.cpp
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# ==========================================
|
| 40 |
+
# PHASE 2: The Python Bridge (Pybind11)
|
| 41 |
+
# ==========================================
|
| 42 |
+
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
|
| 43 |
+
find_package(pybind11 CONFIG REQUIRED)
|
| 44 |
+
|
| 45 |
+
pybind11_add_module(vecmini src/bindings.cpp)
|
| 46 |
+
|
| 47 |
+
# Only link your own engine and the required math/threading backends
|
| 48 |
+
target_link_libraries(vecmini PRIVATE
|
| 49 |
+
vectordb
|
| 50 |
+
OpenMP::OpenMP_CXX
|
| 51 |
+
${BLAS_LIBRARIES}
|
| 52 |
+
${LAPACK_LIBRARIES}
|
| 53 |
+
)
|
Dockerfile
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Stage 1: Build your module
|
| 2 |
+
FROM python:3.10-slim AS builder
|
| 3 |
+
|
| 4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
+
build-essential \
|
| 6 |
+
cmake \
|
| 7 |
+
libopenblas-dev \
|
| 8 |
+
liblapack-dev \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
RUN pip install --no-cache-dir pybind11
|
| 13 |
+
|
| 14 |
+
# Copy only what your engine needs
|
| 15 |
+
COPY CMakeLists.txt ./
|
| 16 |
+
COPY src/ ./src
|
| 17 |
+
COPY include/ ./include
|
| 18 |
+
|
| 19 |
+
RUN mkdir build && cd build && \
|
| 20 |
+
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
| 21 |
+
make vecmini
|
| 22 |
+
|
| 23 |
+
# Stage 2: Final Runtime Environment
|
| 24 |
+
FROM python:3.10-slim
|
| 25 |
+
|
| 26 |
+
RUN useradd -m -u 1000 user
|
| 27 |
+
WORKDIR /home/user/app
|
| 28 |
+
|
| 29 |
+
# Install runtime math dependencies
|
| 30 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 31 |
+
libopenblas0 \
|
| 32 |
+
libomp-dev \
|
| 33 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 34 |
+
|
| 35 |
+
# Copy the compiled .so module directly into the Python environment
|
| 36 |
+
COPY --from=builder /app/build/vecmini*.so /home/user/app/
|
| 37 |
+
|
| 38 |
+
# Copy your frontend code (app.py)
|
| 39 |
+
COPY --chown=user . /home/user/app
|
| 40 |
+
RUN pip install --no-cache-dir gradio numpy
|
| 41 |
+
|
| 42 |
+
ENV PORT=7860
|
| 43 |
+
EXPOSE 7860
|
| 44 |
+
|
| 45 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
import vecmini
|
| 5 |
+
import pypdf
|
| 6 |
+
from transformers import AutoTokenizer, AutoModel
|
| 7 |
+
|
| 8 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 9 |
+
torch_dtype = torch.float16
|
| 10 |
+
|
| 11 |
+
print("Loading Sentence Encoder (Bi-Encoder Only)...")
|
| 12 |
+
embed_id = "sentence-transformers/all-MiniLM-L6-v2"
|
| 13 |
+
embed_tokenizer = AutoTokenizer.from_pretrained(embed_id)
|
| 14 |
+
embed_model = AutoModel.from_pretrained(embed_id).to(device).to(torch_dtype)
|
| 15 |
+
|
| 16 |
+
global_chunks = []
|
| 17 |
+
db = None
|
| 18 |
+
global_nlist = 1
|
| 19 |
+
|
| 20 |
+
def mean_pooling(model_output, attention_mask):
|
| 21 |
+
token_embeddings = model_output[0]
|
| 22 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
| 23 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 24 |
+
|
| 25 |
+
def encode_texts(texts):
|
| 26 |
+
encoded_input = embed_tokenizer(texts, padding=True, truncation=True, return_tensors="pt").to(device)
|
| 27 |
+
with torch.no_grad():
|
| 28 |
+
model_output = embed_model(**encoded_input)
|
| 29 |
+
embeddings = mean_pooling(model_output, encoded_input["attention_mask"])
|
| 30 |
+
return torch.nn.functional.normalize(embeddings, p=2, dim=1).cpu().numpy().astype(np.float32)
|
| 31 |
+
|
| 32 |
+
def process_pdf(file_obj):
|
| 33 |
+
global global_chunks, db, global_nlist
|
| 34 |
+
|
| 35 |
+
if file_obj is None:
|
| 36 |
+
return "Error: No file uploaded."
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
reader = pypdf.PdfReader(file_obj.name)
|
| 40 |
+
text = ""
|
| 41 |
+
for page in reader.pages:
|
| 42 |
+
extracted = page.extract_text()
|
| 43 |
+
if extracted:
|
| 44 |
+
text += extracted + " "
|
| 45 |
+
except Exception as e:
|
| 46 |
+
return f"Failed to read PDF: {str(e)}"
|
| 47 |
+
|
| 48 |
+
if not text.strip():
|
| 49 |
+
return "Error: Could not extract any readable text from this PDF."
|
| 50 |
+
|
| 51 |
+
chunk_size = 200
|
| 52 |
+
words = text.split()
|
| 53 |
+
global_chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
| 54 |
+
|
| 55 |
+
embeddings = encode_texts(global_chunks)
|
| 56 |
+
d = embeddings.shape[1]
|
| 57 |
+
nb = embeddings.shape[0]
|
| 58 |
+
|
| 59 |
+
global_nlist = max(1, int(nb / 4))
|
| 60 |
+
|
| 61 |
+
db = vecmini.IndexIVF(d, global_nlist)
|
| 62 |
+
db.train(nb, embeddings)
|
| 63 |
+
db.add(nb, embeddings, np.arange(nb, dtype=np.uint64))
|
| 64 |
+
|
| 65 |
+
return f"Success! Extracted {nb} chunks from the PDF and built vecmini index."
|
| 66 |
+
|
| 67 |
+
def retrieve_chunks(query, top_k):
|
| 68 |
+
if db is None or not global_chunks:
|
| 69 |
+
return "Please upload and process a PDF first."
|
| 70 |
+
if not query.strip():
|
| 71 |
+
return "Please enter a search query."
|
| 72 |
+
|
| 73 |
+
query_emb = encode_texts([query])
|
| 74 |
+
|
| 75 |
+
fetch_k = min(int(top_k), len(global_chunks))
|
| 76 |
+
nprobe = max(1, int(global_nlist / 2))
|
| 77 |
+
|
| 78 |
+
distances, labels = db.search(1, query_emb, k=fetch_k, nprobe=nprobe, bitmask=None)
|
| 79 |
+
|
| 80 |
+
retrieved_indices = [idx for idx in labels[0] if idx < len(global_chunks)]
|
| 81 |
+
|
| 82 |
+
output_text = f"### Top {len(retrieved_indices)} Results for: *'{query}'*\n\n"
|
| 83 |
+
|
| 84 |
+
for i, idx in enumerate(retrieved_indices):
|
| 85 |
+
dist = distances[0][i]
|
| 86 |
+
chunk_text = global_chunks[idx]
|
| 87 |
+
output_text += f"**Result {i+1}** | Vector Distance: `{dist:.4f}` | Chunk ID: `{idx}`\n"
|
| 88 |
+
output_text += f"> {chunk_text}\n\n---\n\n"
|
| 89 |
+
|
| 90 |
+
return output_text
|
| 91 |
+
|
| 92 |
+
with gr.Blocks(title="Vecmini Visualizer") as demo:
|
| 93 |
+
gr.Markdown("# Vecmini: PDF Raw Retrieval Tester")
|
| 94 |
+
gr.Markdown("Upload a PDF, build the index, and see exactly what `vecmini` returns for your queries.")
|
| 95 |
+
|
| 96 |
+
with gr.Row():
|
| 97 |
+
with gr.Column():
|
| 98 |
+
pdf_input = gr.File(label="Upload PDF Document", file_types=[".pdf"])
|
| 99 |
+
process_btn = gr.Button("Build Vecmini Index", variant="primary")
|
| 100 |
+
status_out = gr.Textbox(label="Index Status", interactive=False)
|
| 101 |
+
|
| 102 |
+
with gr.Column():
|
| 103 |
+
query_input = gr.Textbox(label="Search Query")
|
| 104 |
+
k_slider = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of chunks to retrieve (K)")
|
| 105 |
+
search_btn = gr.Button("Search Vecmini")
|
| 106 |
+
|
| 107 |
+
results_out = gr.Markdown(label="Retrieved Chunks")
|
| 108 |
+
|
| 109 |
+
process_btn.click(fn=process_pdf, inputs=pdf_input, outputs=status_out)
|
| 110 |
+
search_btn.click(fn=retrieve_chunks, inputs=[query_input, k_slider], outputs=results_out)
|
| 111 |
+
query_input.submit(fn=retrieve_chunks, inputs=[query_input, k_slider], outputs=results_out)
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
demo.launch()
|
include/IndexFlat.h
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <vector>
|
| 3 |
+
|
| 4 |
+
class IndexFlatL2{
|
| 5 |
+
int d;//dimension of vector
|
| 6 |
+
int ntotal=0;//no of vector in the db
|
| 7 |
+
std::vector<float>xb;
|
| 8 |
+
|
| 9 |
+
public:
|
| 10 |
+
IndexFlatL2(int d) : d(d) {}
|
| 11 |
+
// ingests 'n'vectors from a raw memory pointer 'x' into the database
|
| 12 |
+
void add(int n, const float *x);
|
| 13 |
+
//k->how many nearest neghbour we want
|
| 14 |
+
// ans is saved in distances and labels
|
| 15 |
+
void search(int n, const float *x, int k, float *distances, int* labels);
|
| 16 |
+
};
|
include/IndexIVF.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <vector>
|
| 3 |
+
#include "IndexFlat.h"
|
| 4 |
+
#include <cstdint>
|
| 5 |
+
|
| 6 |
+
class IndexIVF {
|
| 7 |
+
private:
|
| 8 |
+
int d;
|
| 9 |
+
int nbucket;
|
| 10 |
+
int ntotal = 0;
|
| 11 |
+
bool trained = false;
|
| 12 |
+
|
| 13 |
+
IndexFlatL2 router;
|
| 14 |
+
std::vector<std::vector<float>> memory;
|
| 15 |
+
std::vector<std::vector<uint64_t>> memory_ids;
|
| 16 |
+
|
| 17 |
+
public: // The interface (Your benchmark script is allowed to use these)
|
| 18 |
+
IndexIVF(int d, int nbucket);
|
| 19 |
+
void train(int n, const float *x);
|
| 20 |
+
void add(int n, const float *x, const uint64_t*xids);
|
| 21 |
+
void search(int n, const float* x, int k, int nprobe, const uint8_t *bitmask, float *distances, int *labels, const uint8_t *L1_summary = nullptr);
|
| 22 |
+
void inject_centroids(const float* external_centroids);
|
| 23 |
+
};
|
include/IndexIVFPQ.h
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include<vector>
|
| 3 |
+
#include<cstdint>
|
| 4 |
+
#include "IndexPQ.h"
|
| 5 |
+
#include "IndexFlat.h"
|
| 6 |
+
#include <cstddef>
|
| 7 |
+
class IndexIVFPQ{
|
| 8 |
+
private:
|
| 9 |
+
int d;
|
| 10 |
+
int m;//bitquant
|
| 11 |
+
int nbucket; //no of centroid
|
| 12 |
+
int ntotal; //no of vector index
|
| 13 |
+
bool trained=false;
|
| 14 |
+
size_t nprobe;//how many voronoi i should look at
|
| 15 |
+
IndexFlatL2 router;
|
| 16 |
+
IndexPQ pq;
|
| 17 |
+
std::vector<float>coarse_centroids;
|
| 18 |
+
std::vector<std::vector<uint8_t>>codes;
|
| 19 |
+
std::vector<std::vector<int64_t>>ids;
|
| 20 |
+
|
| 21 |
+
public:
|
| 22 |
+
IndexIVFPQ(int d, int nbucket, int m);
|
| 23 |
+
void train(int n, const float *x, bool subsampling, int seed);
|
| 24 |
+
void add(int n, const float *x, const uint64_t* xids);
|
| 25 |
+
void search(int n, const float *query, int k, int nprobe, float* distances, int64_t* labels);
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
|
include/IndexPQ.h
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <vector>
|
| 3 |
+
#include <cstdint>
|
| 4 |
+
class IndexPQ{
|
| 5 |
+
private:
|
| 6 |
+
int d;
|
| 7 |
+
int m;
|
| 8 |
+
int k_sub;
|
| 9 |
+
int d_sub;
|
| 10 |
+
|
| 11 |
+
std::vector<float> centroids;
|
| 12 |
+
bool trained = false;
|
| 13 |
+
public:
|
| 14 |
+
IndexPQ(int d, int m);
|
| 15 |
+
void train(int n, const float* x, bool subsampling, int seed);
|
| 16 |
+
void encode(const float *x, uint8_t* out);
|
| 17 |
+
void compute_distance_table(const float *query, float *outable);
|
| 18 |
+
};
|
include/clustering.h
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include<vector>
|
| 3 |
+
|
| 4 |
+
void kmean_clustering(
|
| 5 |
+
int d,
|
| 6 |
+
int n,
|
| 7 |
+
int k,
|
| 8 |
+
const float *x,
|
| 9 |
+
float *centroids,
|
| 10 |
+
int seed
|
| 11 |
+
);
|
src/IndexFlat.cpp
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "IndexFlat.h"
|
| 2 |
+
#include <queue>
|
| 3 |
+
#include <vector>
|
| 4 |
+
#include<immintrin.h>
|
| 5 |
+
|
| 6 |
+
void IndexFlatL2::add(int n, const float *x){
|
| 7 |
+
xb.insert(xb.end(), x, x+(n*d));
|
| 8 |
+
ntotal+=n;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
void IndexFlatL2::search(int n, const float *x, int k, float *distances, int *labels){
|
| 12 |
+
for(int i = 0; i<n; i++){//iterate over the entire query
|
| 13 |
+
//old stuff
|
| 14 |
+
//float min_distance = 1e9;
|
| 15 |
+
//int bestid = -1;
|
| 16 |
+
std::priority_queue<std::pair<float, int>> pq;
|
| 17 |
+
|
| 18 |
+
for(int j= 0; j<ntotal; j++){//compare query against every vec in db
|
| 19 |
+
float curr_distance = 0;
|
| 20 |
+
int m = 0;
|
| 21 |
+
|
| 22 |
+
__m256 sumvec = _mm256_setzero_ps();
|
| 23 |
+
|
| 24 |
+
const float* current_db_vec = &xb[j * d];
|
| 25 |
+
const float* current_q_vec = &x[i * d];
|
| 26 |
+
|
| 27 |
+
for(; m + 7 < d; m += 8){
|
| 28 |
+
__m256 dbvec = _mm256_loadu_ps(¤t_db_vec[m]);
|
| 29 |
+
__m256 qvec = _mm256_loadu_ps(¤t_q_vec[m]);
|
| 30 |
+
|
| 31 |
+
__m256 diff = _mm256_sub_ps(dbvec, qvec);
|
| 32 |
+
|
| 33 |
+
sumvec = _mm256_fmadd_ps(diff, diff, sumvec);
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
__m128 upper = _mm256_extractf128_ps(sumvec,1);
|
| 38 |
+
__m128 lower = _mm256_castps256_ps128(sumvec);
|
| 39 |
+
|
| 40 |
+
__m128 sumbound = _mm_add_ps(upper, lower);
|
| 41 |
+
__m128 shifted = _mm_movehl_ps(sumbound,sumbound);
|
| 42 |
+
__m128 current = _mm_add_ps(sumbound, shifted);
|
| 43 |
+
__m128 shuffled = _mm_shuffle_ps(current, current, 1);
|
| 44 |
+
__m128 finalsum = _mm_add_ps(current, shuffled);
|
| 45 |
+
curr_distance = _mm_cvtss_f32(finalsum);
|
| 46 |
+
|
| 47 |
+
for(; m < d; m++){
|
| 48 |
+
float dist = current_db_vec[m] - current_q_vec[m];
|
| 49 |
+
curr_distance += (dist * dist);
|
| 50 |
+
}
|
| 51 |
+
/*
|
| 52 |
+
if (curr_distance<min_distance){
|
| 53 |
+
min_distance=curr_distance;
|
| 54 |
+
bestid = j;
|
| 55 |
+
} */
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
if(pq.size()<k){
|
| 60 |
+
pq.push({curr_distance,j});
|
| 61 |
+
}else{
|
| 62 |
+
if(curr_distance<pq.top().first){
|
| 63 |
+
pq.pop();
|
| 64 |
+
pq.push({curr_distance,j});
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
/*
|
| 69 |
+
distances[i] = min_distance;
|
| 70 |
+
labels[i] = bestid; */
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
//standard for loop cannot handle garbage values.
|
| 74 |
+
//for that we need 2 seperate for loop, one that handles the queue content properly
|
| 75 |
+
int count=pq.size();
|
| 76 |
+
for(int c = count-1; c>=0; c--){
|
| 77 |
+
distances[i*k+c] = pq.top().first;
|
| 78 |
+
labels[i*k+c] = pq.top().second;
|
| 79 |
+
pq.pop();
|
| 80 |
+
}
|
| 81 |
+
for(int step=count; step<k; step++){
|
| 82 |
+
distances[i*k+step] = -1.0;
|
| 83 |
+
labels[i*k+step] = -1;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
}
|
src/IndexIVF.cpp
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "IndexIVF.h"
|
| 2 |
+
#include "clustering.h"
|
| 3 |
+
#include <queue>
|
| 4 |
+
#include <iostream>
|
| 5 |
+
#include<immintrin.h>
|
| 6 |
+
IndexIVF::IndexIVF(int d, int nbucket): d(d), nbucket(nbucket), router(d){
|
| 7 |
+
memory.resize(nbucket);
|
| 8 |
+
memory_ids.resize(nbucket);
|
| 9 |
+
};
|
| 10 |
+
|
| 11 |
+
void IndexIVF::train(int n, const float *x){
|
| 12 |
+
if(trained) return;
|
| 13 |
+
std::vector<float>centroids(nbucket*d);
|
| 14 |
+
|
| 15 |
+
//remove seed
|
| 16 |
+
kmean_clustering(d, n, nbucket, x ,centroids.data(),1);
|
| 17 |
+
router.add(nbucket, centroids.data());
|
| 18 |
+
trained=true;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
void IndexIVF::add(int n, const float *x, const uint64_t*xids){
|
| 22 |
+
if(!trained) return;
|
| 23 |
+
std::vector<int> assign(n);
|
| 24 |
+
std::vector<float> distances(n);
|
| 25 |
+
router.search(n,x,1,distances.data(), assign.data());
|
| 26 |
+
for(int i =0; i<n; i++){
|
| 27 |
+
int bucketid= assign[i];
|
| 28 |
+
memory[bucketid].insert(memory[bucketid].end(),x+(i*d), x+(i*d)+d);
|
| 29 |
+
//for metadata
|
| 30 |
+
memory_ids[bucketid].push_back(xids[i]);
|
| 31 |
+
}
|
| 32 |
+
ntotal+=n;
|
| 33 |
+
}
|
| 34 |
+
void IndexIVF::search(int n, const float* x, int k, int nprobe, const uint8_t *bitmask, float *distances, int *labels, const uint8_t *vecmini_L1_summary){
|
| 35 |
+
std::vector<int>assign(n*nprobe);
|
| 36 |
+
std::vector<float> centroids_distance(n*nprobe);
|
| 37 |
+
|
| 38 |
+
router.search(n,x,nprobe,centroids_distance.data(), assign.data());
|
| 39 |
+
for(int i = 0; i<n; i++){
|
| 40 |
+
//std::unordered_set <uint64_t> set;
|
| 41 |
+
// std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>, std::greater<std::pair<float, int>>> pq;
|
| 42 |
+
std::priority_queue<std::pair<float, int>> pq;
|
| 43 |
+
const float *specquer = x+(i*d);
|
| 44 |
+
for(int p= 0; p<nprobe; p++){
|
| 45 |
+
int bucketid = assign[i*nprobe+p];
|
| 46 |
+
int vectorinmemo = memory[bucketid].size()/d;
|
| 47 |
+
if(vectorinmemo==0)continue;
|
| 48 |
+
const float *bucketdata= memory[bucketid].data();
|
| 49 |
+
|
| 50 |
+
for(int j = 0; j<vectorinmemo; j++){
|
| 51 |
+
int prefetch_stride = 1;
|
| 52 |
+
|
| 53 |
+
if(j + prefetch_stride < vectorinmemo){
|
| 54 |
+
_mm_prefetch((const char*)&bucketdata[(j + prefetch_stride) * d], _MM_HINT_T0);
|
| 55 |
+
|
| 56 |
+
if (bitmask != nullptr) {
|
| 57 |
+
uint64_t future_id = memory_ids[bucketid][j + prefetch_stride];
|
| 58 |
+
|
| 59 |
+
// If you ever use L1 summary again, prefetch it here:
|
| 60 |
+
// if (vecmini_L1_summary != nullptr) _mm_prefetch((const char*)&vecmini_L1_summary[future_id / 8], _MM_HINT_T0);
|
| 61 |
+
|
| 62 |
+
// Prefetch the massive uint8_t mask byte
|
| 63 |
+
_mm_prefetch((const char*)&bitmask[future_id], _MM_HINT_T0);
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
uint64_t global_id = memory_ids[bucketid][j];
|
| 68 |
+
|
| 69 |
+
if (bitmask != nullptr && bitmask[global_id]==0 ) {
|
| 70 |
+
continue;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
//removed this for simd
|
| 74 |
+
|
| 75 |
+
//benchmark for standard cosine calc->
|
| 76 |
+
//nullptr: 6.32857
|
| 77 |
+
//bitmask: 4.60353
|
| 78 |
+
|
| 79 |
+
//after simd
|
| 80 |
+
//nullptr: 1.3298
|
| 81 |
+
//bitmask: 0.918149
|
| 82 |
+
|
| 83 |
+
//added simd
|
| 84 |
+
float dist = 0;
|
| 85 |
+
int m = 0;
|
| 86 |
+
|
| 87 |
+
__m256 sumvec = _mm256_setzero_ps();
|
| 88 |
+
|
| 89 |
+
/*for(int m = 0; m<d; m++){
|
| 90 |
+
currcosine+=(bucketdata[j*d+m]*specquer[m]);
|
| 91 |
+
}*/
|
| 92 |
+
for(; m+7<d; m+=8){
|
| 93 |
+
//load 8float from the db chunk
|
| 94 |
+
__m256 dbvec= _mm256_loadu_ps(&bucketdata[j*d+m]);
|
| 95 |
+
__m256 qvec= _mm256_loadu_ps(&specquer[m]);
|
| 96 |
+
__m256 diff = _mm256_sub_ps(dbvec, qvec); //-> only add for un normalized vectors
|
| 97 |
+
sumvec = _mm256_fmadd_ps(diff, diff, sumvec);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
__m128 upper = _mm256_extractf128_ps(sumvec, 1);
|
| 101 |
+
__m128 lower = _mm256_extractf128_ps(sumvec, 0);
|
| 102 |
+
|
| 103 |
+
__m128 sumbound = _mm_add_ps(upper, lower);
|
| 104 |
+
__m128 shifted = _mm_movehl_ps(sumbound, sumbound);
|
| 105 |
+
__m128 current = _mm_add_ps(sumbound, shifted);
|
| 106 |
+
__m128 shuffled = _mm_shuffle_ps(current, current, 1);
|
| 107 |
+
__m128 finalsum = _mm_add_ps(current, shuffled);
|
| 108 |
+
dist = _mm_cvtss_f32(finalsum);
|
| 109 |
+
|
| 110 |
+
/*
|
| 111 |
+
float sumarr[8];
|
| 112 |
+
_mm256_storeu_ps(sumarr,sumvec);
|
| 113 |
+
currcosine= sumarr[0]+sumarr[1]+
|
| 114 |
+
sumarr[2]+sumarr[3]+
|
| 115 |
+
sumarr[4]+sumarr[5]+
|
| 116 |
+
sumarr[6]+sumarr[7];
|
| 117 |
+
//cleanup
|
| 118 |
+
*/
|
| 119 |
+
if(pq.size()<k){
|
| 120 |
+
pq.push({dist, global_id});
|
| 121 |
+
}else{
|
| 122 |
+
if(dist<pq.top().first){
|
| 123 |
+
pq.pop();
|
| 124 |
+
pq.push({dist, global_id});
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
}
|
| 129 |
+
float *speldist = distances+(i*k);
|
| 130 |
+
int *spelbs = labels+(i*k);
|
| 131 |
+
int count = pq.size();
|
| 132 |
+
for(int c = count-1; c>=0; c--){
|
| 133 |
+
speldist[c]= pq.top().first;
|
| 134 |
+
spelbs[c]= pq.top().second;
|
| 135 |
+
pq.pop();
|
| 136 |
+
}
|
| 137 |
+
for(int step = count; step<k; step++){
|
| 138 |
+
speldist[step]=-1.0;
|
| 139 |
+
spelbs[step]= -1;
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
void IndexIVF::inject_centroids(const float* external_centroids) {
|
| 145 |
+
if(trained) return;
|
| 146 |
+
router.add(nbucket, external_centroids);
|
| 147 |
+
trained = true;
|
| 148 |
+
}
|
src/IndexIVFPQ.cpp
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "IndexIVFPQ.h"
|
| 2 |
+
#include "IndexIVF.h"
|
| 3 |
+
#include "clustering.h"
|
| 4 |
+
#include <queue>
|
| 5 |
+
#include <iostream>
|
| 6 |
+
#include <immintrin.h>
|
| 7 |
+
#include <random>
|
| 8 |
+
#include <cstring>
|
| 9 |
+
IndexIVFPQ::IndexIVFPQ(int d, int nbucket, int m): d(d), m(m), nbucket(nbucket), router(d), pq(d, m){
|
| 10 |
+
codes.resize(nbucket);
|
| 11 |
+
ids.resize(nbucket);
|
| 12 |
+
};
|
| 13 |
+
|
| 14 |
+
void IndexIVFPQ::train(int n, const float *x, bool subsampling, int seed){
|
| 15 |
+
if(trained)return;
|
| 16 |
+
coarse_centroids.resize(nbucket*d);
|
| 17 |
+
|
| 18 |
+
int maxtrain = 150000;
|
| 19 |
+
if(n>maxtrain && subsampling){
|
| 20 |
+
std::mt19937 gen(seed);
|
| 21 |
+
std::uniform_int_distribution<int>dis(0,n-1);
|
| 22 |
+
std::vector<float> sample_buffer(maxtrain * d);
|
| 23 |
+
for(int i=0; i<maxtrain; i++){
|
| 24 |
+
int randval = dis(gen);
|
| 25 |
+
std::memcpy(&sample_buffer[i*d],
|
| 26 |
+
&x[randval * d],
|
| 27 |
+
d * sizeof(float));
|
| 28 |
+
}
|
| 29 |
+
kmean_clustering(d, maxtrain, nbucket, sample_buffer.data(), coarse_centroids.data(), seed);
|
| 30 |
+
}else{kmean_clustering(d, n, nbucket, x, coarse_centroids.data(), seed);}
|
| 31 |
+
|
| 32 |
+
router.add(nbucket, coarse_centroids.data());
|
| 33 |
+
std::vector<float>residuals(n*d);
|
| 34 |
+
std::vector<float> distances(n);
|
| 35 |
+
std::vector<int> labels(n);
|
| 36 |
+
router.search(n,x,1,distances.data(), labels.data());
|
| 37 |
+
for(int i = 0;i<n; i++){
|
| 38 |
+
int drawerid = labels[i];
|
| 39 |
+
for(int j = 0; j<d; j++){
|
| 40 |
+
residuals[(i*d)+j] = x[(i*d)+j] - coarse_centroids[(drawerid*d)+j];
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
pq.train(n, residuals.data(), subsampling, seed);
|
| 44 |
+
trained = true;
|
| 45 |
+
}
|
| 46 |
+
void IndexIVFPQ::add(int n, const float *x, const uint64_t* xids){
|
| 47 |
+
if (!trained) return;
|
| 48 |
+
std::vector<float>residuals(n*d);
|
| 49 |
+
std::vector<float> distances(n);
|
| 50 |
+
std::vector<int> labels(n);
|
| 51 |
+
router.search(n,x,1,distances.data(), labels.data());
|
| 52 |
+
std::cout << "expected centroids size: " << nbucket * d << std::endl;
|
| 53 |
+
std::cout << "actual centroids size: " << coarse_centroids.size() << std::endl;
|
| 54 |
+
std::cout << "codes vector size: " << codes.size() << std::endl;
|
| 55 |
+
for(int i = 0;i<n; i++){
|
| 56 |
+
int drawerid = labels[i];
|
| 57 |
+
for(int j = 0; j<d; j++){
|
| 58 |
+
residuals[(i*d)+j] = x[(i*d)+j]-coarse_centroids[(drawerid*d)+j];
|
| 59 |
+
}
|
| 60 |
+
std::vector<uint8_t> zipvect(m);
|
| 61 |
+
pq.encode(residuals.data()+(i*d), zipvect.data());
|
| 62 |
+
codes[drawerid].insert(codes[drawerid].end(), zipvect.begin(), zipvect.end());
|
| 63 |
+
ids[drawerid].push_back(xids[i]);
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
void IndexIVFPQ::search(int n, const float *query, int k, int nprobe, float* distances, int64_t* labels){
|
| 67 |
+
std::vector<int> assign(n*nprobe);
|
| 68 |
+
std::vector<float> coarse_distances(n*nprobe);
|
| 69 |
+
router.search(n,query, nprobe, coarse_distances.data(),assign.data());
|
| 70 |
+
for(int i = 0; i<n; i++){
|
| 71 |
+
std::priority_queue<std::pair<float, int>> max_heap;
|
| 72 |
+
std::vector<float> query_residual(d);
|
| 73 |
+
for(int p=0; p<nprobe; p++){
|
| 74 |
+
int drawerid = assign[(i*nprobe)+p];
|
| 75 |
+
/*for(int j = 0; j<d; j++){
|
| 76 |
+
query_residual[j] = query[(i*d)+j] - coarse_centroids[(drawerid*d)+j];
|
| 77 |
+
}
|
| 78 |
+
*/
|
| 79 |
+
|
| 80 |
+
for(int j=0; j<d; j+=8){
|
| 81 |
+
__m256 ccvec= _mm256_loadu_ps(&coarse_centroids[(drawerid*d)+j]);
|
| 82 |
+
__m256 qrvec= _mm256_loadu_ps(&query[(i*d)+j]);
|
| 83 |
+
__m256 diffvec = _mm256_sub_ps(qrvec,ccvec);
|
| 84 |
+
_mm256_storeu_ps(&query_residual[j], diffvec);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
std::vector<float> distance_table(m*256);
|
| 89 |
+
pq.compute_distance_table(query_residual.data(), distance_table.data());
|
| 90 |
+
for(int v = 0; v<codes[drawerid].size()/m; v++){
|
| 91 |
+
float totaldistance =0.0;
|
| 92 |
+
for(int m_idx = 0; m_idx<m; m_idx++){
|
| 93 |
+
int centroid_id = codes[drawerid][(v*m)+m_idx];
|
| 94 |
+
totaldistance+=distance_table[centroid_id+(m_idx*256)];
|
| 95 |
+
}
|
| 96 |
+
if(max_heap.size()<k){
|
| 97 |
+
max_heap.push({totaldistance, ids[drawerid][v]});
|
| 98 |
+
}else{
|
| 99 |
+
if(totaldistance<max_heap.top().first){
|
| 100 |
+
max_heap.pop();
|
| 101 |
+
max_heap.push({totaldistance, ids[drawerid][v]});
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
float *subdist = distances+(i*k);
|
| 107 |
+
int64_t *sublbs = labels+(i*k);
|
| 108 |
+
int count = max_heap.size();
|
| 109 |
+
for(int c = count-1; c>=0; c--){
|
| 110 |
+
subdist[c] = max_heap.top().first;
|
| 111 |
+
sublbs[c] = max_heap.top().second;
|
| 112 |
+
max_heap.pop();
|
| 113 |
+
}
|
| 114 |
+
for(int fod = count; fod<k; fod++){
|
| 115 |
+
subdist[fod]=-1.0;
|
| 116 |
+
sublbs[fod]=-1;
|
| 117 |
+
}
|
| 118 |
+
}
|
| 119 |
+
}
|
src/IndexPQ.cpp
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <IndexPQ.h>
|
| 2 |
+
#include <vector>
|
| 3 |
+
#include "clustering.h"
|
| 4 |
+
#include<immintrin.h>
|
| 5 |
+
#include <random>
|
| 6 |
+
#include <cstring>
|
| 7 |
+
IndexPQ::IndexPQ(int d, int m):d(d), m(m){
|
| 8 |
+
k_sub = 256;
|
| 9 |
+
d_sub = d/m;
|
| 10 |
+
centroids.resize(m*d_sub*k_sub);
|
| 11 |
+
};
|
| 12 |
+
void IndexPQ::train(int n, const float *x, bool subsampling, int seed){
|
| 13 |
+
if(trained) return;
|
| 14 |
+
std::vector<float> train_data(n * d_sub);
|
| 15 |
+
|
| 16 |
+
for(int i = 0; i < m; i++){
|
| 17 |
+
for(int row = 0; row < n; row++){
|
| 18 |
+
const float* source_id = x + (row * d) + (i * d_sub);
|
| 19 |
+
float* dest_id = train_data.data() + (row * d_sub);
|
| 20 |
+
for(int j = 0; j < d_sub; j++){
|
| 21 |
+
dest_id[j] = source_id[j];
|
| 22 |
+
}
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
int maxtrain = 150000;
|
| 26 |
+
if(n > maxtrain && subsampling){
|
| 27 |
+
std::mt19937 gen(seed + i);
|
| 28 |
+
std::uniform_int_distribution<int> dis(0, n - 1);
|
| 29 |
+
|
| 30 |
+
std::vector<float> sample_buffer(maxtrain * d_sub);
|
| 31 |
+
for(int p = 0; p < maxtrain; p++){
|
| 32 |
+
int randval = dis(gen);
|
| 33 |
+
std::memcpy(&sample_buffer[p * d_sub],
|
| 34 |
+
&train_data[randval * d_sub],
|
| 35 |
+
d_sub * sizeof(float));
|
| 36 |
+
}
|
| 37 |
+
kmean_clustering(d_sub, maxtrain, k_sub, sample_buffer.data(), centroids.data() + (i * d_sub * k_sub),seed+i);
|
| 38 |
+
} else {
|
| 39 |
+
kmean_clustering(d_sub, n, k_sub, train_data.data() , centroids.data() + (i * d_sub * k_sub), seed+i);
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
trained = true;
|
| 43 |
+
}
|
| 44 |
+
void IndexPQ::encode(const float *x, uint8_t* out){
|
| 45 |
+
if(!trained)return;
|
| 46 |
+
for(int i =0; i<m; i++){
|
| 47 |
+
const float *query_chunk = x + (i*d_sub);
|
| 48 |
+
float mindistance = 1e9;
|
| 49 |
+
int bestid = 0;
|
| 50 |
+
for(int id=0; id<k_sub; id++){
|
| 51 |
+
const float* centroid_chunk = centroids.data()+(i*k_sub*d_sub)+(id*d_sub);
|
| 52 |
+
float dist = 0;
|
| 53 |
+
for(int j =0; j<d_sub; j++){
|
| 54 |
+
float diff = query_chunk[j]- centroid_chunk[j];
|
| 55 |
+
dist += diff*diff;
|
| 56 |
+
}
|
| 57 |
+
if(dist<mindistance){
|
| 58 |
+
mindistance = dist;
|
| 59 |
+
bestid = id;
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
out[i] = bestid;
|
| 63 |
+
}
|
| 64 |
+
}
|
| 65 |
+
//precalc all distance for query and centroid
|
| 66 |
+
void IndexPQ::compute_distance_table(const float *query, float *outable){
|
| 67 |
+
for(int i =0; i<m; i++){
|
| 68 |
+
const float* query_slice = query+(i*d_sub);
|
| 69 |
+
for(int j = 0; j<k_sub; j++){
|
| 70 |
+
float dist = 0;
|
| 71 |
+
const float *offset= centroids.data()+(i*k_sub*d_sub) + (j*d_sub);
|
| 72 |
+
/*for(int k = 0;k<d_sub; k++){
|
| 73 |
+
float diff = offset[k]-query_slice[k];
|
| 74 |
+
dist+=diff*diff;
|
| 75 |
+
}*/
|
| 76 |
+
__m256 sumvec = _mm256_setzero_ps();
|
| 77 |
+
for(int k =0; k<d_sub; k+=8){
|
| 78 |
+
__m256 offvec= _mm256_loadu_ps(&offset[k]);
|
| 79 |
+
__m256 querslice= _mm256_loadu_ps(&query_slice[k]);
|
| 80 |
+
__m256 diffvec = _mm256_sub_ps(offvec,querslice);
|
| 81 |
+
sumvec = _mm256_fmadd_ps(diffvec, diffvec, sumvec);
|
| 82 |
+
}
|
| 83 |
+
float unpacked[8];
|
| 84 |
+
_mm256_storeu_ps(unpacked, sumvec);
|
| 85 |
+
dist=unpacked[0]+unpacked[1]+
|
| 86 |
+
unpacked[2]+unpacked[3]+
|
| 87 |
+
unpacked[4]+unpacked[5]+
|
| 88 |
+
unpacked[6]+unpacked[7];
|
| 89 |
+
outable[(i*k_sub)+j] = dist;
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
src/bindings.cpp
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <pybind11/pybind11.h>
|
| 2 |
+
#include <pybind11/numpy.h>
|
| 3 |
+
#include "IndexIVF.h"
|
| 4 |
+
#include "IndexIVFPQ.h"
|
| 5 |
+
#include "iostream"
|
| 6 |
+
#include <pybind11/stl.h>
|
| 7 |
+
#include <vector>
|
| 8 |
+
namespace py = pybind11;
|
| 9 |
+
|
| 10 |
+
// "vecmini" is the name of the module you will type in python-> 'import vecmini'
|
| 11 |
+
PYBIND11_MODULE(vecmini, m) {
|
| 12 |
+
m.doc() = "Vecmini: A mini custom IVF Vector Database with Metadata Filtering";
|
| 13 |
+
|
| 14 |
+
py::class_<IndexIVF>(m, "IndexIVF")
|
| 15 |
+
.def(py::init<int, int>(), py::arg("d"), py::arg("nbucket"))
|
| 16 |
+
|
| 17 |
+
.def("train", [](IndexIVF &self, int n, py::array_t<float, py::array::c_style> x) {
|
| 18 |
+
py::buffer_info buf = x.request();
|
| 19 |
+
self.train(n, (const float *)buf.ptr);
|
| 20 |
+
}, py::arg("n"), py::arg("x").noconvert())
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
.def("add", [](IndexIVF &self, int n,
|
| 24 |
+
py::array_t<float, py::array::c_style | py::array::forcecast> x,
|
| 25 |
+
py::array_t<uint64_t, py::array::c_style | py::array::forcecast> xids) {
|
| 26 |
+
|
| 27 |
+
py::buffer_info buf_x = x.request();
|
| 28 |
+
py::buffer_info buf_xids = xids.request();
|
| 29 |
+
|
| 30 |
+
self.add(n, (const float *)buf_x.ptr, (const uint64_t *)buf_xids.ptr);
|
| 31 |
+
}, py::arg("n"), py::arg("x"), py::arg("xids"))
|
| 32 |
+
|
| 33 |
+
// Expose search() - UPDATED FOR NPROBE AND BITMASK
|
| 34 |
+
.def("search", [](IndexIVF &self, int n,
|
| 35 |
+
py::array_t<float, py::array::c_style | py::array::forcecast> x,
|
| 36 |
+
int k, int nprobe, py::object bitmask) {
|
| 37 |
+
|
| 38 |
+
py::buffer_info buf_x = x.request();
|
| 39 |
+
|
| 40 |
+
// Empty arrays to hold the answers for Python
|
| 41 |
+
py::array_t<float> distances({n, k});
|
| 42 |
+
py::array_t<int> labels({n, k});
|
| 43 |
+
|
| 44 |
+
const uint8_t* bitmask_ptr = nullptr;
|
| 45 |
+
py::array_t<uint8_t> bitmask_arr;
|
| 46 |
+
|
| 47 |
+
if (!bitmask.is_none()) {
|
| 48 |
+
bitmask_arr = bitmask.cast<py::array_t<uint8_t, py::array::c_style | py::array::forcecast>>();
|
| 49 |
+
bitmask_ptr = (const uint8_t*)bitmask_arr.request().ptr;
|
| 50 |
+
std::cout<<"recieved bitmask , *pointer address->" <<(void*)bitmask_ptr<<"\n";
|
| 51 |
+
} else {
|
| 52 |
+
std::cout<<"recieved NONE\n";
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
self.search(n, (const float *)buf_x.ptr, k, nprobe, bitmask_ptr,
|
| 57 |
+
distances.mutable_data(), labels.mutable_data());
|
| 58 |
+
|
| 59 |
+
return py::make_tuple(distances, labels);
|
| 60 |
+
}, py::arg("n"), py::arg("x"), py::arg("k"), py::arg("nprobe"), py::arg("bitmask"));
|
| 61 |
+
|
| 62 |
+
py::class_<IndexIVFPQ>(m, "IndexIVFPQ")
|
| 63 |
+
.def(py::init<int, int, int>(),
|
| 64 |
+
py::arg("d"),
|
| 65 |
+
py::arg("nbucket"),
|
| 66 |
+
py::arg("m"))
|
| 67 |
+
|
| 68 |
+
.def("train", [](IndexIVFPQ &self, int n, py::array_t<float, py::array::c_style> x, bool subsampling, bool seed) {
|
| 69 |
+
py::buffer_info buf = x.request();
|
| 70 |
+
self.train(n, static_cast<const float *>(buf.ptr), subsampling, seed);
|
| 71 |
+
}, py::arg("n"), py::arg("x").noconvert(), py::arg("subsampling"), py::arg("seed"))
|
| 72 |
+
|
| 73 |
+
.def("add", [](IndexIVFPQ &self,int n, py::array_t<float, py::array::c_style> x, py::array_t<uint64_t, py::array::c_style> xids){
|
| 74 |
+
py::buffer_info bufx = x.request();
|
| 75 |
+
py::buffer_info bufxids = xids.request();
|
| 76 |
+
|
| 77 |
+
self.add(n, static_cast<const float *>(bufx.ptr),static_cast<const uint64_t *>(bufxids.ptr));
|
| 78 |
+
}, py::arg("n"), py::arg("x").noconvert(), py::arg("xids").noconvert())
|
| 79 |
+
|
| 80 |
+
.def("search", [](IndexIVFPQ &self, int n,
|
| 81 |
+
py::array_t<float, py::array::c_style> query,
|
| 82 |
+
int k, int nprobe){
|
| 83 |
+
py::buffer_info buf_query = query.request();
|
| 84 |
+
|
| 85 |
+
py::array_t<float> distances({n,k});
|
| 86 |
+
py::array_t<int64_t> labels({n,k});
|
| 87 |
+
|
| 88 |
+
self.search(n, static_cast<const float *>(buf_query.ptr), k, nprobe, distances.mutable_data(), labels.mutable_data());
|
| 89 |
+
|
| 90 |
+
return py::make_tuple(distances, labels);
|
| 91 |
+
}, py::arg("n"), py::arg("query").noconvert(), py::arg("k"), py::arg("nprobe"));
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
py::class_<IndexFlatL2>(m, "IndexFlatL2")
|
| 96 |
+
.def(py::init<int>(),
|
| 97 |
+
py::arg("d"))
|
| 98 |
+
|
| 99 |
+
.def("add", [](IndexFlatL2 &self,int n, py::array_t<float, py::array::c_style> x){
|
| 100 |
+
py::buffer_info bufx = x.request();
|
| 101 |
+
|
| 102 |
+
self.add(n, static_cast<const float *>(bufx.ptr));
|
| 103 |
+
}, py::arg("n"), py::arg("x").noconvert())
|
| 104 |
+
|
| 105 |
+
.def("search", [](IndexFlatL2 &self, int n,
|
| 106 |
+
py::array_t<float, py::array::c_style> x,
|
| 107 |
+
int k){
|
| 108 |
+
py::buffer_info bufx = x.request();
|
| 109 |
+
|
| 110 |
+
py::array_t<float> distances({n,k});
|
| 111 |
+
py::array_t<int> labels({n,k});
|
| 112 |
+
|
| 113 |
+
self.search(n, static_cast<const float *>(bufx.ptr), k, distances.mutable_data(), labels.mutable_data());
|
| 114 |
+
|
| 115 |
+
return py::make_tuple(distances, labels);
|
| 116 |
+
}, py::arg("n"), py::arg("x").noconvert(), py::arg("k"));
|
| 117 |
+
|
| 118 |
+
}
|
src/clustering.cpp
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include<vector>
|
| 2 |
+
#include "clustering.h"
|
| 3 |
+
#include "IndexFlat.h"
|
| 4 |
+
#include <random>
|
| 5 |
+
#include <cstring>
|
| 6 |
+
#include <cmath>
|
| 7 |
+
void kmean_clustering(int d, int n, int k, const float *x, float *centroids, int seed){
|
| 8 |
+
std::mt19937 gen(seed);
|
| 9 |
+
std::uniform_int_distribution<int> distr(0, n - 1);
|
| 10 |
+
|
| 11 |
+
for (int i = 0; i < k; i++) {
|
| 12 |
+
int rand_idx = distr(gen);
|
| 13 |
+
std::memcpy(centroids + (i * d), x + (rand_idx * d), d * sizeof(float));
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
int niter = 15;
|
| 17 |
+
std::vector<int> assign(n);
|
| 18 |
+
std::vector<float> distances(n);
|
| 19 |
+
for(int iter = 0; iter<niter; iter++){
|
| 20 |
+
IndexFlatL2 index(d);
|
| 21 |
+
index.add(k,centroids);
|
| 22 |
+
index.search(n,x,1,distances.data(), assign.data());
|
| 23 |
+
std::vector<float> newcentroid(k*d,0.0);
|
| 24 |
+
std::vector<int> counts(k,0);
|
| 25 |
+
for(int i = 0; i<n; i++){
|
| 26 |
+
int c = assign[i];
|
| 27 |
+
counts[c]+=1;
|
| 28 |
+
for(int m =0; m<d; m++){
|
| 29 |
+
newcentroid[c*d+m] += x[i*d+m];
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
for(int c = 0; c<k; c++){
|
| 33 |
+
if (counts[c]>0){
|
| 34 |
+
for(int m = 0; m<d; m++){
|
| 35 |
+
centroids[c*d+m] = newcentroid[c*d+m]/counts[c];
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
}
|
| 40 |
+
}
|
src/rand.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|