midokhaled927 commited on
Commit
fbc8423
·
verified ·
1 Parent(s): 9ceead5

Create services/search.py

Browse files
Files changed (1) hide show
  1. services/search.py +172 -0
services/search.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import json
3
+ import sqlite3
4
+ from pathlib import Path
5
+ from typing import List, Dict, Tuple
6
+ import faiss
7
+ from loguru import logger
8
+
9
+ class VectorSearchService:
10
+ def __init__(self, db_path="database/identities.db", vector_dim=512):
11
+ self.db_path = db_path
12
+ self.vector_dim = vector_dim
13
+ self.index = None
14
+ self.id_to_index = {}
15
+ self.index_to_id = {}
16
+
17
+ # تهيئة FAISS index
18
+ self.init_index()
19
+
20
+ def init_index(self):
21
+ """تهيئة فهرس FAISS للبحث السريع"""
22
+ try:
23
+ # استخدام IndexFlatIP (Inner Product) للتشابه
24
+ self.index = faiss.IndexFlatIP(self.vector_dim)
25
+ logger.info("✅ FAISS index initialized successfully")
26
+ self.load_existing_embeddings()
27
+ except Exception as e:
28
+ logger.error(f"❌ Failed to initialize FAISS: {e}")
29
+ self.index = None
30
+
31
+ def load_existing_embeddings(self):
32
+ """تحميل المتجهات الموجودة من قاعدة البيانات"""
33
+ try:
34
+ conn = sqlite3.connect(self.db_path)
35
+ cursor = conn.cursor()
36
+ cursor.execute("SELECT id, embedding FROM identities")
37
+ results = cursor.fetchall()
38
+ conn.close()
39
+
40
+ vectors = []
41
+ for idx, (identity_id, emb_json) in enumerate(results):
42
+ embedding = np.array(json.loads(emb_json)).astype(np.float32)
43
+ vectors.append(embedding)
44
+ self.id_to_index[identity_id] = idx
45
+ self.index_to_id[idx] = identity_id
46
+
47
+ if vectors:
48
+ vectors_array = np.vstack(vectors)
49
+ self.index.add(vectors_array)
50
+ logger.info(f"✅ Loaded {len(vectors)} existing embeddings")
51
+
52
+ except Exception as e:
53
+ logger.warning(f"⚠️ No existing embeddings loaded: {e}")
54
+
55
+ def add_embedding(self, identity_id: str, embedding: np.ndarray):
56
+ """إضافة متجه جديد إلى الفهرس"""
57
+ if self.index is None:
58
+ logger.warning("⚠️ FAISS index not available, using fallback")
59
+ return
60
+
61
+ embedding = embedding.astype(np.float32).reshape(1, -1)
62
+
63
+ # إضافة إلى FAISS
64
+ idx = self.index.ntotal
65
+ self.index.add(embedding)
66
+
67
+ # تحديث التعيينات
68
+ self.id_to_index[identity_id] = idx
69
+ self.index_to_id[idx] = identity_id
70
+
71
+ logger.info(f"✅ Added embedding for {identity_id} at index {idx}")
72
+
73
+ def search(self, query_embedding: np.ndarray, k: int = 5, threshold: float = 0.6) -> List[Dict]:
74
+ """البحث عن أكثر الوجوه تشابهاً"""
75
+ if self.index is None or self.index.ntotal == 0:
76
+ logger.warning("⚠️ No embeddings in index, returning empty results")
77
+ return []
78
+
79
+ query_embedding = query_embedding.astype(np.float32).reshape(1, -1)
80
+
81
+ # البحث في FAISS
82
+ similarities, indices = self.index.search(query_embedding, min(k, self.index.ntotal))
83
+
84
+ results = []
85
+ for similarity, idx in zip(similarities[0], indices[0]):
86
+ if idx == -1:
87
+ continue
88
+
89
+ identity_id = self.index_to_id.get(int(idx))
90
+ if identity_id and similarity >= threshold:
91
+ # جلب معلومات إضافية من قاعدة البيانات
92
+ conn = sqlite3.connect(self.db_path)
93
+ cursor = conn.cursor()
94
+ cursor.execute("SELECT name, metadata FROM identities WHERE id = ?", (identity_id,))
95
+ row = cursor.fetchone()
96
+ conn.close()
97
+
98
+ if row:
99
+ results.append({
100
+ 'identity_id': identity_id,
101
+ 'name': row[0],
102
+ 'similarity': float(similarity),
103
+ 'metadata': json.loads(row[1]) if row[1] else {}
104
+ })
105
+
106
+ return results
107
+
108
+ def remove_embedding(self, identity_id: str):
109
+ """حذف متجه من الفهرس"""
110
+ if identity_id not in self.id_to_index:
111
+ logger.warning(f"⚠️ Identity {identity_id} not found in index")
112
+ return
113
+
114
+ # FAISS لا يدعم الحذف المباشر، نحتاج لإعادة بناء الفهرس
115
+ logger.info("🔄 Rebuilding index after removal...")
116
+ self.rebuild_index()
117
+
118
+ def rebuild_index(self):
119
+ """إعادة بناء الفهرس بالكامل"""
120
+ self.init_index()
121
+ self.load_existing_embeddings()
122
+ logger.info("✅ Index rebuilt successfully")
123
+
124
+ def get_stats(self) -> Dict:
125
+ """إحصائيات عن الفهرس"""
126
+ return {
127
+ 'total_vectors': self.index.ntotal if self.index else 0,
128
+ 'vector_dimension': self.vector_dim,
129
+ 'index_type': type(self.index).__name__ if self.index else 'None',
130
+ 'is_ready': self.index is not None and self.index.ntotal > 0
131
+ }
132
+
133
+ class SimpleVectorSearch:
134
+ """نسخة بسيطة للبحث عندما لا يتوفر FAISS"""
135
+
136
+ def __init__(self, db_path="database/identities.db"):
137
+ self.db_path = db_path
138
+
139
+ def search(self, query_embedding: np.ndarray, k: int = 5, threshold: float = 0.6) -> List[Dict]:
140
+ """بحث خطي بسيط"""
141
+ try:
142
+ conn = sqlite3.connect(self.db_path)
143
+ cursor = conn.cursor()
144
+ cursor.execute("SELECT id, name, embedding, metadata FROM identities")
145
+ results = cursor.fetchall()
146
+ conn.close()
147
+
148
+ similarities = []
149
+ for identity_id, name, emb_json, metadata in results:
150
+ db_embedding = np.array(json.loads(emb_json))
151
+ similarity = np.dot(query_embedding, db_embedding)
152
+ similarities.append((identity_id, name, similarity, metadata))
153
+
154
+ # ترتيب النتائج
155
+ similarities.sort(key=lambda x: x[2], reverse=True)
156
+
157
+ # تصفية حسب العتبة
158
+ results_list = []
159
+ for identity_id, name, similarity, metadata in similarities[:k]:
160
+ if similarity >= threshold:
161
+ results_list.append({
162
+ 'identity_id': identity_id,
163
+ 'name': name,
164
+ 'similarity': float(similarity),
165
+ 'metadata': json.loads(metadata) if metadata else {}
166
+ })
167
+
168
+ return results_list
169
+
170
+ except Exception as e:
171
+ logger.error(f"❌ Simple search failed: {e}")
172
+ return []