vikramvasudevan commited on
Commit
be938d8
·
verified ·
1 Parent(s): 2ff9f44

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. server.py +15 -27
server.py CHANGED
@@ -296,10 +296,7 @@ async def search_scripture_find_first_match(
296
  req: ScriptureFirstSearchRequst,
297
  ):
298
  """
299
- Search scripture collection and return the first matching result.
300
- - `scripture_name`: Name of the collection
301
- - `filter_obj`: MetadataWhereClause (filters, groups, operator)
302
- - `has_audio`: optional. can take values any|none|recitation|virutham|upanyasam
303
  """
304
  filter_obj = req.filter_obj
305
  has_audio = req.has_audio
@@ -319,8 +316,8 @@ async def search_scripture_find_first_match(
319
  if not config:
320
  return {"error": f"Scripture '{scripture_name}' not found"}
321
 
322
- # 1️⃣ Fetch results (same as before)
323
- results = db.fetch_first_match(
324
  collection_name=config["collection_name"],
325
  metadata_where_clause=filter_obj,
326
  )
@@ -331,18 +328,15 @@ async def search_scripture_find_first_match(
331
  metadata_doc = results["metadatas"][i]
332
  metadata_doc["id"] = doc_id
333
 
334
- document_text = (
335
- results["documents"][i] if results.get("documents") else None
336
- )
337
  canonical_doc = SanatanConfig().canonicalize_document(
338
  scripture_name, document_text, metadata_doc
339
  )
340
  formatted_results.append(canonical_doc)
341
 
342
- # 2️⃣ Apply has_audio filter (same logic as in search_scripture_find_all_matches)
343
  if has_audio and formatted_results:
344
  if has_audio == AudioType.none:
345
- # Get all indices that have any audio
346
  all_audio_indices = set()
347
  for atype in [
348
  AudioType.recitation,
@@ -353,45 +347,39 @@ async def search_scripture_find_first_match(
353
  indices = await svc_get_indices_with_audio(scripture_name, atype)
354
  all_audio_indices.update(indices)
355
 
356
- # Keep only those without audio
357
  formatted_results = [
358
- r
359
- for r in formatted_results
360
- if r["_global_index"] not in all_audio_indices
361
  ]
362
-
363
  else:
 
364
  if has_audio == AudioType.any:
365
- audio_indices = set()
366
  for atype in [
367
  AudioType.recitation,
368
  AudioType.virutham,
369
  AudioType.upanyasam,
370
  AudioType.santhai,
371
  ]:
372
- indices = await svc_get_indices_with_audio(
373
- scripture_name, atype
374
- )
375
  audio_indices.update(indices)
376
  else:
377
- audio_indices = set(
378
- await svc_get_indices_with_audio(scripture_name, has_audio)
379
- )
380
 
381
  formatted_results = [
382
  r for r in formatted_results if r["_global_index"] in audio_indices
383
  ]
384
 
385
- # 3️⃣ Return only the first valid result (if any)
386
- return {
387
- "results": formatted_results[:1] if formatted_results else [],
388
- }
 
389
 
390
  except Exception as e:
391
  logger.error("Error while searching %s", e, exc_info=True)
392
  return {"error": str(e)}
393
 
394
 
 
395
  class ScriptureMultiSearchRequest(BaseModel):
396
  filter_obj: Optional[MetadataWhereClause] = None
397
  page: int = 1
 
296
  req: ScriptureFirstSearchRequst,
297
  ):
298
  """
299
+ Search scripture collection and return the first matching result after applying audio filter.
 
 
 
300
  """
301
  filter_obj = req.filter_obj
302
  has_audio = req.has_audio
 
316
  if not config:
317
  return {"error": f"Scripture '{scripture_name}' not found"}
318
 
319
+ # 1️⃣ Fetch all matches
320
+ results = db.fetch_all_matches(
321
  collection_name=config["collection_name"],
322
  metadata_where_clause=filter_obj,
323
  )
 
328
  metadata_doc = results["metadatas"][i]
329
  metadata_doc["id"] = doc_id
330
 
331
+ document_text = results["documents"][i] if results.get("documents") else None
 
 
332
  canonical_doc = SanatanConfig().canonicalize_document(
333
  scripture_name, document_text, metadata_doc
334
  )
335
  formatted_results.append(canonical_doc)
336
 
337
+ # 2️⃣ Apply has_audio filter
338
  if has_audio and formatted_results:
339
  if has_audio == AudioType.none:
 
340
  all_audio_indices = set()
341
  for atype in [
342
  AudioType.recitation,
 
347
  indices = await svc_get_indices_with_audio(scripture_name, atype)
348
  all_audio_indices.update(indices)
349
 
 
350
  formatted_results = [
351
+ r for r in formatted_results if r["_global_index"] not in all_audio_indices
 
 
352
  ]
 
353
  else:
354
+ audio_indices = set()
355
  if has_audio == AudioType.any:
 
356
  for atype in [
357
  AudioType.recitation,
358
  AudioType.virutham,
359
  AudioType.upanyasam,
360
  AudioType.santhai,
361
  ]:
362
+ indices = await svc_get_indices_with_audio(scripture_name, atype)
 
 
363
  audio_indices.update(indices)
364
  else:
365
+ audio_indices.update(await svc_get_indices_with_audio(scripture_name, has_audio))
 
 
366
 
367
  formatted_results = [
368
  r for r in formatted_results if r["_global_index"] in audio_indices
369
  ]
370
 
371
+ # 3️⃣ Sort by global index
372
+ formatted_results.sort(key=lambda x: x["_global_index"])
373
+
374
+ # 4️⃣ Return only the first valid result
375
+ return {"results": formatted_results[:1] if formatted_results else []}
376
 
377
  except Exception as e:
378
  logger.error("Error while searching %s", e, exc_info=True)
379
  return {"error": str(e)}
380
 
381
 
382
+
383
  class ScriptureMultiSearchRequest(BaseModel):
384
  filter_obj: Optional[MetadataWhereClause] = None
385
  page: int = 1