File size: 4,342 Bytes
847d463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""Debug FAISS crash - check index integrity and dimensions"""
import sys
import os
import traceback

# Add project to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

print("=" * 60)
print("FAISS Debug Script")
print("=" * 60)

# Step 1: Check faiss version and basic import
print("\n[1] Checking faiss-cpu version...")
try:
    import faiss
    print(f"    faiss version: {faiss.__version__ if hasattr(faiss, '__version__') else 'unknown'}")
    print(f"    faiss path: {faiss.__file__}")
except Exception as e:
    print(f"    ERROR: {e}")
    sys.exit(1)

# Step 2: Check numpy version
print("\n[2] Checking numpy...")
import numpy as np
print(f"    numpy version: {np.__version__}")

# Step 3: Try loading FAISS index directly (not through LangChain)
print("\n[3] Loading FAQ FAISS index directly...")
faq_index_path = os.path.join("data", "vector_stores", "faq_store", "index.faiss")
try:
    index = faiss.read_index(faq_index_path)
    print(f"    Index loaded!")
    print(f"    Index type: {type(index)}")
    print(f"    Dimension: {index.d}")
    print(f"    Total vectors: {index.ntotal}")
    print(f"    Is trained: {index.is_trained}")
except Exception as e:
    print(f"    ERROR loading index: {e}")
    traceback.print_exc()
    sys.exit(1)

# Step 4: Try a raw FAISS search
print("\n[4] Trying raw FAISS search...")
try:
    # Create a random query vector matching the index dimension
    query_vec = np.random.rand(1, index.d).astype('float32')
    print(f"    Query shape: {query_vec.shape}, dtype: {query_vec.dtype}")
    distances, indices = index.search(query_vec, 3)
    print(f"    Search succeeded!")
    print(f"    Distances: {distances}")
    print(f"    Indices: {indices}")
except Exception as e:
    print(f"    ERROR during raw search: {e}")
    traceback.print_exc()
    sys.exit(1)

# Step 5: Try with actual embeddings
print("\n[5] Loading embedding model...")
try:
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    embedding = model.encode(["damaged product"], convert_to_numpy=True)
    print(f"    Embedding shape: {embedding.shape}, dtype: {embedding.dtype}")
    print(f"    Index dimension: {index.d}")
    
    if embedding.shape[1] != index.d:
        print(f"    *** DIMENSION MISMATCH! Embedding={embedding.shape[1]}, Index={index.d} ***")
    else:
        print(f"    Dimensions match!")
except Exception as e:
    print(f"    ERROR: {e}")
    traceback.print_exc()

# Step 6: Try FAISS search with real embedding
print("\n[6] FAISS search with real embedding...")
try:
    query_vec = embedding.astype('float32')
    distances, indices = index.search(query_vec, 3)
    print(f"    Search succeeded!")
    print(f"    Distances: {distances}")
    print(f"    Indices: {indices}")
except Exception as e:
    print(f"    ERROR: {e}")
    traceback.print_exc()

# Step 7: Try loading through LangChain
print("\n[7] Loading via LangChain FAISS...")
try:
    from langchain_community.vectorstores import FAISS as LangChainFAISS
    from langchain.embeddings.base import Embeddings
    
    class SimpleEmbeddings(Embeddings):
        def __init__(self):
            self.model = model
        def embed_documents(self, texts):
            return self.model.encode(texts, convert_to_numpy=True).tolist()
        def embed_query(self, text):
            return self.model.encode([text], convert_to_numpy=True)[0].tolist()
    
    embeddings = SimpleEmbeddings()
    store = LangChainFAISS.load_local(
        os.path.join("data", "vector_stores", "faq_store"),
        embeddings,
        allow_dangerous_deserialization=True
    )
    print(f"    LangChain FAISS store loaded!")
    print(f"    Store type: {type(store)}")
except Exception as e:
    print(f"    ERROR: {e}")
    traceback.print_exc()
    sys.exit(1)

# Step 8: Try LangChain similarity search
print("\n[8] LangChain similarity_search_with_score...")
try:
    results = store.similarity_search_with_score("damaged product", k=3)
    print(f"    SUCCESS! Got {len(results)} results")
    for doc, score in results:
        print(f"    Score={score:.4f}: {doc.page_content[:80]}...")
except Exception as e:
    print(f"    ERROR: {e}")
    traceback.print_exc()

print("\n" + "=" * 60)
print("Debug complete!")
print("=" * 60)