Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -162,38 +162,44 @@ def search_memory():
|
|
| 162 |
return jsonify({"error": "No query provided"}), 400
|
| 163 |
|
| 164 |
try:
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
#{'query': 'car', 'hits': [], 'took_ms': 16, 'total_hits': 0, 'engine': 'tantivy', 'context': ''}
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
# 2. Refined Parsing (The Anti-Junk Filter)
|
| 179 |
-
clean_results = []
|
| 180 |
-
for hit in response['hits']:
|
| 181 |
-
print(hit)
|
| 182 |
-
# Filter low relevance
|
| 183 |
-
if hit['score'] < 0.65:
|
| 184 |
-
continue
|
| 185 |
-
'''{'frame_id': 0, 'uri': 'mv2://frames/0', 'title': 'Untitled', 'rank': 1, 'score': 5.422323226928711, 'matches': 3, 'snippet': 'the dog is blue\ntitle: Untitled\ntags: blue dog\nlabels: text Blue Dog\nextractous_metadata: {"coverage":1.0,"sections_extracted":1,"sections_total":1,"skim":false}', 'tags': ['blue', 'dog'], 'labels': ['text', 'Blue', 'Dog'], 'track': None, 'created_at': '2026-01-11T03:33:52Z', 'content_dates': []}'''
|
| 186 |
clean_results.append({
|
| 187 |
-
"title": hit
|
| 188 |
-
"
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
"date": hit
|
| 192 |
-
"score": f"{
|
| 193 |
})
|
| 194 |
|
| 195 |
-
#return jsonify(clean_results)
|
| 196 |
-
|
| 197 |
return jsonify({"success": True, "results": clean_results})
|
| 198 |
except Exception as e:
|
| 199 |
return jsonify({"error": str(e)}), 500
|
|
|
|
| 162 |
return jsonify({"error": "No query provided"}), 400
|
| 163 |
|
| 164 |
try:
|
| 165 |
+
# 1. Search
|
| 166 |
+
response = db.find(query, top_k=10, snippet_chars=150)
|
| 167 |
+
|
| 168 |
+
# 2. Parse & Clean
|
| 169 |
+
clean_results = []
|
| 170 |
+
hits = response.get('hits', [])
|
| 171 |
|
| 172 |
+
for hit in hits:
|
| 173 |
+
score = hit.get('score', 0.0)
|
| 174 |
+
if score < 0.65: continue
|
|
|
|
| 175 |
|
| 176 |
+
# --- CLEANING LOGIC ---
|
| 177 |
+
# 1. Get raw snippet
|
| 178 |
+
raw_snippet = hit.get('snippet', '')
|
| 179 |
+
|
| 180 |
+
# 2. Split by lines and remove technical metadata headers
|
| 181 |
+
# (The raw snippet appends metadata at the bottom)
|
| 182 |
+
lines = raw_snippet.split('\n')
|
| 183 |
+
content_lines = [
|
| 184 |
+
line for line in lines
|
| 185 |
+
if not line.strip().startswith(('title:', 'tags:', 'labels:', 'extractous_metadata:'))
|
| 186 |
+
]
|
| 187 |
+
clean_text = "\n".join(content_lines).strip()
|
| 188 |
+
|
| 189 |
+
# 3. Use the explicit arrays provided by the SDK for tags/labels
|
| 190 |
+
# (These are much cleaner than parsing the string)
|
| 191 |
+
tags = hit.get('tags', [])
|
| 192 |
+
labels = hit.get('labels', [])
|
| 193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
clean_results.append({
|
| 195 |
+
"title": hit.get('title') or "Untitled Memory",
|
| 196 |
+
"text": clean_text, # The cleaned up content
|
| 197 |
+
"tags": tags, # List of strings ['blue', 'dog']
|
| 198 |
+
"labels": labels, # List of strings ['text', 'Blue']
|
| 199 |
+
"date": hit.get('created_at', ''),
|
| 200 |
+
"score": f"{score:.2f}"
|
| 201 |
})
|
| 202 |
|
|
|
|
|
|
|
| 203 |
return jsonify({"success": True, "results": clean_results})
|
| 204 |
except Exception as e:
|
| 205 |
return jsonify({"error": str(e)}), 500
|