sxid003 commited on
Commit
e6d636d
·
verified ·
1 Parent(s): df9c6f0

Update src/utils/search_docs_utils.py

Browse files
Files changed (1) hide show
  1. src/utils/search_docs_utils.py +23 -46
src/utils/search_docs_utils.py CHANGED
@@ -272,57 +272,34 @@ def select_parlement_transcript(query: str, embeddings_path="output/parlement_ti
272
  def search_relevant_documents(query: str, top_k: int = 3) -> dict:
273
  """
274
  Search for the most relevant documents following the logic of the provided graph:
275
- - Detects the user's intention ("lois/règlements" or "parlement")
276
- - If "lois/règlements": detects language, filters, then selects documents
277
- - If "parlement": directly selects the most relevant parliamentary transcripts
278
  Returns a dictionary with intention, language (if applicable), and the relevant documents.
279
  """
280
- # Detect user intention
281
- intention = detect_intention(query)
282
  # Detect language
283
  lang = detect_language(query)
284
 
285
- if intention == "parlement":
286
- if parlement_embeddings is None or parlement_metadatas is None:
287
- return {
288
- "error": "Parliamentary embeddings not available. Please run preprocessing first.",
289
- "intention": intention,
290
- "language": lang
291
- }
292
-
293
- # Search parliamentary transcripts
294
- results = select_parlement_transcript(query, top_k=top_k)
295
-
296
  return {
297
- "intention": intention,
298
- "language": lang,
299
- "results": results,
300
- "count": len(results) if isinstance(results, list) else 1
301
  }
302
-
303
- else:
304
- if embeddings is None or metadatas is None:
305
- return {
306
- "error": "Document embeddings not available. Please run preprocessing first.",
307
- "intention": intention,
308
- "language": lang
309
- }
310
 
311
- # Filter by language and select documents
312
- indices = filter_by_language(metadatas, lang)
313
- filtered_embeddings = embeddings[indices]
314
- filtered_metadatas = [metadatas[i] for i in indices]
315
-
316
- results = select_documents(
317
- query,
318
- filtered_embeddings,
319
- filtered_metadatas,
320
- lang=lang,
321
- top_k=top_k
322
- )
323
- return {
324
- "intention": intention,
325
- "language": lang,
326
- "results": results,
327
- "count": len(results)
328
- }
 
 
272
  def search_relevant_documents(query: str, top_k: int = 3) -> dict:
273
  """
274
  Search for the most relevant documents following the logic of the provided graph:
 
 
 
275
  Returns a dictionary with intention, language (if applicable), and the relevant documents.
276
  """
277
+
 
278
  # Detect language
279
  lang = detect_language(query)
280
 
281
+ if embeddings is None or metadatas is None:
 
 
 
 
 
 
 
 
 
 
282
  return {
283
+ "error": "Document embeddings not available. Please run preprocessing first.",
284
+ "language": lang
 
 
285
  }
 
 
 
 
 
 
 
 
286
 
287
+ # Filter by language and select documents
288
+ indices = filter_by_language(metadatas, lang)
289
+ filtered_embeddings = embeddings[indices]
290
+ filtered_metadatas = [metadatas[i] for i in indices]
291
+
292
+ results = select_documents(
293
+ query,
294
+ filtered_embeddings,
295
+ filtered_metadatas,
296
+ lang=lang,
297
+ top_k=top_k
298
+ )
299
+
300
+ return {
301
+ #"intention": 'intention',
302
+ "language": lang,
303
+ "results": results,
304
+ "count": len(results)
305
+ }