danulr05 commited on
Commit
f668ec8
·
verified ·
1 Parent(s): 58e1c05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -38
app.py CHANGED
@@ -278,6 +278,13 @@ def semantic_search(query: str, top_k=1, category_filter=None, language='en'):
278
  threshold = max_score * 0.5 # Show documents within 50% of best score
279
  max_docs = 5
280
 
 
 
 
 
 
 
 
281
  results = []
282
  doc_count = 0
283
 
@@ -285,47 +292,47 @@ def semantic_search(query: str, top_k=1, category_filter=None, language='en'):
285
  if doc_count >= max_docs or score < threshold:
286
  break
287
 
288
- # Get the metadata for this document
289
- for match in res["matches"]:
 
290
  metadata = match["metadata"]
291
- if metadata.get("file_path", "") == file_path:
292
- # Use the DYNAMIC_METADATA mapping if available, otherwise use metadata
293
- proposal_data = DYNAMIC_METADATA.get(file_path, {
294
- "title": metadata.get("title", "Unknown Title"),
295
- "summary": metadata.get("summary", ""),
296
- "category": metadata.get("category", "Budget Proposal"),
297
- "costLKR": metadata.get("costLKR", "No Costing Available")
298
- })
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
- # Get language-specific data
301
- title = get_language_specific_data(proposal_data, "title", language)
302
- summary = get_language_specific_data(proposal_data, "summary", language)
303
- costLKR = get_language_specific_data(proposal_data, "costLKR", language)
304
- category = get_language_specific_data(proposal_data, "category", language)
305
- thumb_url = metadata.get("thumbUrl", "")
 
 
 
 
 
 
 
306
 
307
- # Only include documents that have meaningful content in the requested language
308
- # Skip documents where title and summary are empty or "Unknown"/"No summary available"
309
- if (title and title.strip() and title not in ["Unknown", "Unknown Title", ""] and
310
- summary and summary.strip() and summary not in ["No summary available", ""]):
311
-
312
- result = {
313
- "title": title,
314
- "summary": summary,
315
- "costLKR": costLKR,
316
- "category": category,
317
- "pdfUrl": f"assets/pdfs/{file_path}" if file_path else "",
318
- "thumbUrl": f"assets/thumbs/{thumb_url}" if thumb_url else "",
319
- "score": score,
320
- "relevance_percentage": int(score * 100),
321
- "file_path": file_path,
322
- "id": match["id"],
323
- "content": metadata.get("content", "") # Add the actual content
324
- }
325
-
326
- results.append(result)
327
- doc_count += 1
328
- break
329
 
330
  return results
331
  except Exception as e:
 
278
  threshold = max_score * 0.5 # Show documents within 50% of best score
279
  max_docs = 5
280
 
281
+ # Create a lookup dictionary for efficient metadata retrieval
282
+ metadata_lookup = {}
283
+ for match in res["matches"]:
284
+ file_path_key = match["metadata"].get("file_path", "")
285
+ if file_path_key not in metadata_lookup:
286
+ metadata_lookup[file_path_key] = match
287
+
288
  results = []
289
  doc_count = 0
290
 
 
292
  if doc_count >= max_docs or score < threshold:
293
  break
294
 
295
+ # Get the metadata for this document using the lookup
296
+ if file_path in metadata_lookup:
297
+ match = metadata_lookup[file_path]
298
  metadata = match["metadata"]
299
+
300
+ # Use the DYNAMIC_METADATA mapping if available, otherwise use metadata
301
+ proposal_data = DYNAMIC_METADATA.get(file_path, {
302
+ "title": metadata.get("title", "Unknown Title"),
303
+ "summary": metadata.get("summary", ""),
304
+ "category": metadata.get("category", "Budget Proposal"),
305
+ "costLKR": metadata.get("costLKR", "No Costing Available")
306
+ })
307
+
308
+ # Get language-specific data
309
+ title = get_language_specific_data(proposal_data, "title", language)
310
+ summary = get_language_specific_data(proposal_data, "summary", language)
311
+ costLKR = get_language_specific_data(proposal_data, "costLKR", language)
312
+ category = get_language_specific_data(proposal_data, "category", language)
313
+ thumb_url = metadata.get("thumbUrl", "")
314
+
315
+ # Only include documents that have meaningful content in the requested language
316
+ # Skip documents where title and summary are empty or "Unknown"/"No summary available"
317
+ if (title and title.strip() and title not in ["Unknown", "Unknown Title", ""] and
318
+ summary and summary.strip() and summary not in ["No summary available", ""]):
319
 
320
+ result = {
321
+ "title": title,
322
+ "summary": summary,
323
+ "costLKR": costLKR,
324
+ "category": category,
325
+ "pdfUrl": f"assets/pdfs/{file_path}" if file_path else "",
326
+ "thumbUrl": f"assets/thumbs/{thumb_url}" if thumb_url else "",
327
+ "score": score,
328
+ "relevance_percentage": int(score * 100),
329
+ "file_path": file_path,
330
+ "id": match["id"],
331
+ "content": metadata.get("content", "") # Add the actual content
332
+ }
333
 
334
+ results.append(result)
335
+ doc_count += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  return results
338
  except Exception as e: