Spaces:
Runtime error
Runtime error
Update orbiitt_engine.py
Browse files- orbiitt_engine.py +17 -8
orbiitt_engine.py
CHANGED
|
@@ -24,6 +24,7 @@ class OrbiittEngine:
|
|
| 24 |
self.expected_dim = self.model.config.vision_config.hidden_size
|
| 25 |
|
| 26 |
# 4. Connect to Database with Safety Logic
|
|
|
|
| 27 |
self.client = chromadb.PersistentClient(path=self.db_path)
|
| 28 |
self._check_db_compatibility()
|
| 29 |
|
|
@@ -42,10 +43,8 @@ class OrbiittEngine:
|
|
| 42 |
existing_dim = len(sample['embeddings'][0])
|
| 43 |
if existing_dim != self.expected_dim:
|
| 44 |
print(f"⚠️ Dimension Mismatch: DB is {existing_dim}, Model is {self.expected_dim}")
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
else:
|
| 48 |
-
exit()
|
| 49 |
except: pass
|
| 50 |
|
| 51 |
def get_image_embedding(self, image_path):
|
|
@@ -85,7 +84,7 @@ class OrbiittEngine:
|
|
| 85 |
except Exception as e:
|
| 86 |
tqdm.write(f"⚠️ Skipped {fname}: {e}")
|
| 87 |
|
| 88 |
-
def search(self, text_query=None, image_file=None, text_weight=0.5):
|
| 89 |
"""Hybrid search blending visual and text embeddings."""
|
| 90 |
img_vec = None
|
| 91 |
txt_vec = None
|
|
@@ -107,9 +106,18 @@ class OrbiittEngine:
|
|
| 107 |
else:
|
| 108 |
return []
|
| 109 |
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
output = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
for i in range(len(results['ids'][0])):
|
| 114 |
fname = results['ids'][0][i]
|
| 115 |
# distance for 'cosine' is 1 - similarity.
|
|
@@ -117,7 +125,8 @@ class OrbiittEngine:
|
|
| 117 |
score = round((1.0 - results['distances'][0][i]) * 100)
|
| 118 |
output.append({
|
| 119 |
"id": fname,
|
| 120 |
-
"url": f"
|
| 121 |
"score": score
|
| 122 |
})
|
| 123 |
-
return output
|
|
|
|
|
|
| 24 |
self.expected_dim = self.model.config.vision_config.hidden_size
|
| 25 |
|
| 26 |
# 4. Connect to Database with Safety Logic
|
| 27 |
+
print(f"📂 Connecting to ChromaDB at: {self.db_path}")
|
| 28 |
self.client = chromadb.PersistentClient(path=self.db_path)
|
| 29 |
self._check_db_compatibility()
|
| 30 |
|
|
|
|
| 43 |
existing_dim = len(sample['embeddings'][0])
|
| 44 |
if existing_dim != self.expected_dim:
|
| 45 |
print(f"⚠️ Dimension Mismatch: DB is {existing_dim}, Model is {self.expected_dim}")
|
| 46 |
+
# Auto-wipe in production to prevent crash loop, or handle gracefully
|
| 47 |
+
print("!!! WARNING: Database dimension mismatch. Search results may be invalid.")
|
|
|
|
|
|
|
| 48 |
except: pass
|
| 49 |
|
| 50 |
def get_image_embedding(self, image_path):
|
|
|
|
| 84 |
except Exception as e:
|
| 85 |
tqdm.write(f"⚠️ Skipped {fname}: {e}")
|
| 86 |
|
| 87 |
+
def search(self, text_query=None, image_file=None, text_weight=0.5, top_k=20):
|
| 88 |
"""Hybrid search blending visual and text embeddings."""
|
| 89 |
img_vec = None
|
| 90 |
txt_vec = None
|
|
|
|
| 106 |
else:
|
| 107 |
return []
|
| 108 |
|
| 109 |
+
# Pass dynamic top_k to ChromaDB
|
| 110 |
+
try:
|
| 111 |
+
results = self.collection.query(query_embeddings=[query_emb], n_results=top_k)
|
| 112 |
+
except Exception as e:
|
| 113 |
+
print(f"Search Query Error: {e}")
|
| 114 |
+
return []
|
| 115 |
|
| 116 |
output = []
|
| 117 |
+
# Handle case where no results found
|
| 118 |
+
if not results['ids'] or not results['ids'][0]:
|
| 119 |
+
return []
|
| 120 |
+
|
| 121 |
for i in range(len(results['ids'][0])):
|
| 122 |
fname = results['ids'][0][i]
|
| 123 |
# distance for 'cosine' is 1 - similarity.
|
|
|
|
| 125 |
score = round((1.0 - results['distances'][0][i]) * 100)
|
| 126 |
output.append({
|
| 127 |
"id": fname,
|
| 128 |
+
"url": f"Productimages/{fname}", # Relative path
|
| 129 |
"score": score
|
| 130 |
})
|
| 131 |
+
return output
|
| 132 |
+
|