File size: 1,846 Bytes
1db7196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# import faiss

# merged_index = faiss.read_index("/home/mshahidul/readctrl/data/vector_db/qwen_em/shard_0_en.faiss")
# for i in range(1, 2):
#     next_index = faiss.read_index(f"/home/mshahidul/readctrl/data/vector_db/qwen_em/shard_{i}_en.faiss")
#     merged_index.merge_from(next_index)

# faiss.write_index(merged_index, "/home/mshahidul/readctrl/data/vector_db/qwen_em/full_wikipedia_index.faiss")

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

import faiss
import numpy as np
import torch
from sentence_transformers import SentenceTransformer

# 1. Configuration
model_id = "Qwen/Qwen3-Embedding-4B"
index_path = "/home/mshahidul/readctrl/data/vector_db/qwen_em/full_wikipedia_index.faiss"

# 2. Load the Index
print("Loading Index...")
index = faiss.read_index(index_path)
print(f"Index loaded successfully.")
print(f"Total vectors in index: {index.ntotal}")
print(f"Vector dimension: {index.d}")

# 3. Load Model for Querying
print("Loading model for query embedding...")
model = SentenceTransformer(
    model_id, 
    trust_remote_code=True, 
    device="cuda",
    model_kwargs={"torch_dtype": torch.bfloat16}
)

# 4. Perform a Search
query = "What is the capital of France?"
# We must encode the query using the same model
query_vector = model.encode([query], convert_to_numpy=True).astype('float32')

k = 5  # Number of nearest neighbors to find
distances, indices = index.search(query_vector, k)

# 5. Review Results
print("\n--- Search Results ---")
print(f"Query: {query}")
for i in range(k):
    print(f"Result {i+1}: Index ID {indices[0][i]}, Distance: {distances[0][i]:.4f}")

if indices[0][0] == -1:
    print("\nError: The search returned -1. This usually means the index is empty or improperly trained.")
else:
    print("\nSuccess: The index returned valid neighbors!")