broadfield-dev commited on
Commit
1474415
·
verified ·
1 Parent(s): a4512b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -162,38 +162,44 @@ def search_memory():
162
  return jsonify({"error": "No query provided"}), 400
163
 
164
  try:
165
- # 1. Refined Request
166
- search_req = {
167
- "top_k": 10, # Ask for more...
168
- "snippet_chars": 150, # ...but get smaller chunks
169
- "no_sketch": False # Use the sketch track for speed
170
- }
171
 
172
- #response = db.find(query, **search_req)
173
- response = db.find(query)
174
- print(response)
175
- #{'query': 'car', 'hits': [], 'took_ms': 16, 'total_hits': 0, 'engine': 'tantivy', 'context': ''}
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- # 2. Refined Parsing (The Anti-Junk Filter)
179
- clean_results = []
180
- for hit in response['hits']:
181
- print(hit)
182
- # Filter low relevance
183
- if hit['score'] < 0.65:
184
- continue
185
- '''{'frame_id': 0, 'uri': 'mv2://frames/0', 'title': 'Untitled', 'rank': 1, 'score': 5.422323226928711, 'matches': 3, 'snippet': 'the dog is blue\ntitle: Untitled\ntags: blue dog\nlabels: text Blue Dog\nextractous_metadata: {"coverage":1.0,"sections_extracted":1,"sections_total":1,"skim":false}', 'tags': ['blue', 'dog'], 'labels': ['text', 'Blue', 'Dog'], 'track': None, 'created_at': '2026-01-11T03:33:52Z', 'content_dates': []}'''
186
  clean_results.append({
187
- "title": hit['title'] or "Untitled Memory",
188
- "snippet": hit['snippet'], # The highlighted text
189
- #"full_text": hit['text'], # Available on click
190
- #"full_text": hit['text'],
191
- "date": hit['created_at'], # Context!
192
- "score": f"{hit['score']:.2f}"
193
  })
194
 
195
- #return jsonify(clean_results)
196
-
197
  return jsonify({"success": True, "results": clean_results})
198
  except Exception as e:
199
  return jsonify({"error": str(e)}), 500
 
162
  return jsonify({"error": "No query provided"}), 400
163
 
164
  try:
165
+ # 1. Search
166
+ response = db.find(query, top_k=10, snippet_chars=150)
167
+
168
+ # 2. Parse & Clean
169
+ clean_results = []
170
+ hits = response.get('hits', [])
171
 
172
+ for hit in hits:
173
+ score = hit.get('score', 0.0)
174
+ if score < 0.65: continue
 
175
 
176
+ # --- CLEANING LOGIC ---
177
+ # 1. Get raw snippet
178
+ raw_snippet = hit.get('snippet', '')
179
+
180
+ # 2. Split by lines and remove technical metadata headers
181
+ # (The raw snippet appends metadata at the bottom)
182
+ lines = raw_snippet.split('\n')
183
+ content_lines = [
184
+ line for line in lines
185
+ if not line.strip().startswith(('title:', 'tags:', 'labels:', 'extractous_metadata:'))
186
+ ]
187
+ clean_text = "\n".join(content_lines).strip()
188
+
189
+ # 3. Use the explicit arrays provided by the SDK for tags/labels
190
+ # (These are much cleaner than parsing the string)
191
+ tags = hit.get('tags', [])
192
+ labels = hit.get('labels', [])
193
 
 
 
 
 
 
 
 
 
194
  clean_results.append({
195
+ "title": hit.get('title') or "Untitled Memory",
196
+ "text": clean_text, # The cleaned up content
197
+ "tags": tags, # List of strings ['blue', 'dog']
198
+ "labels": labels, # List of strings ['text', 'Blue']
199
+ "date": hit.get('created_at', ''),
200
+ "score": f"{score:.2f}"
201
  })
202
 
 
 
203
  return jsonify({"success": True, "results": clean_results})
204
  except Exception as e:
205
  return jsonify({"error": str(e)}), 500