Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
server.py
CHANGED
|
@@ -296,10 +296,7 @@ async def search_scripture_find_first_match(
|
|
| 296 |
req: ScriptureFirstSearchRequst,
|
| 297 |
):
|
| 298 |
"""
|
| 299 |
-
Search scripture collection and return the first matching result.
|
| 300 |
-
- `scripture_name`: Name of the collection
|
| 301 |
-
- `filter_obj`: MetadataWhereClause (filters, groups, operator)
|
| 302 |
-
- `has_audio`: optional. can take values any|none|recitation|virutham|upanyasam
|
| 303 |
"""
|
| 304 |
filter_obj = req.filter_obj
|
| 305 |
has_audio = req.has_audio
|
|
@@ -319,8 +316,8 @@ async def search_scripture_find_first_match(
|
|
| 319 |
if not config:
|
| 320 |
return {"error": f"Scripture '{scripture_name}' not found"}
|
| 321 |
|
| 322 |
-
# 1️⃣ Fetch
|
| 323 |
-
results = db.
|
| 324 |
collection_name=config["collection_name"],
|
| 325 |
metadata_where_clause=filter_obj,
|
| 326 |
)
|
|
@@ -331,18 +328,15 @@ async def search_scripture_find_first_match(
|
|
| 331 |
metadata_doc = results["metadatas"][i]
|
| 332 |
metadata_doc["id"] = doc_id
|
| 333 |
|
| 334 |
-
document_text = (
|
| 335 |
-
results["documents"][i] if results.get("documents") else None
|
| 336 |
-
)
|
| 337 |
canonical_doc = SanatanConfig().canonicalize_document(
|
| 338 |
scripture_name, document_text, metadata_doc
|
| 339 |
)
|
| 340 |
formatted_results.append(canonical_doc)
|
| 341 |
|
| 342 |
-
# 2️⃣ Apply has_audio filter
|
| 343 |
if has_audio and formatted_results:
|
| 344 |
if has_audio == AudioType.none:
|
| 345 |
-
# Get all indices that have any audio
|
| 346 |
all_audio_indices = set()
|
| 347 |
for atype in [
|
| 348 |
AudioType.recitation,
|
|
@@ -353,45 +347,39 @@ async def search_scripture_find_first_match(
|
|
| 353 |
indices = await svc_get_indices_with_audio(scripture_name, atype)
|
| 354 |
all_audio_indices.update(indices)
|
| 355 |
|
| 356 |
-
# Keep only those without audio
|
| 357 |
formatted_results = [
|
| 358 |
-
r
|
| 359 |
-
for r in formatted_results
|
| 360 |
-
if r["_global_index"] not in all_audio_indices
|
| 361 |
]
|
| 362 |
-
|
| 363 |
else:
|
|
|
|
| 364 |
if has_audio == AudioType.any:
|
| 365 |
-
audio_indices = set()
|
| 366 |
for atype in [
|
| 367 |
AudioType.recitation,
|
| 368 |
AudioType.virutham,
|
| 369 |
AudioType.upanyasam,
|
| 370 |
AudioType.santhai,
|
| 371 |
]:
|
| 372 |
-
indices = await svc_get_indices_with_audio(
|
| 373 |
-
scripture_name, atype
|
| 374 |
-
)
|
| 375 |
audio_indices.update(indices)
|
| 376 |
else:
|
| 377 |
-
audio_indices
|
| 378 |
-
await svc_get_indices_with_audio(scripture_name, has_audio)
|
| 379 |
-
)
|
| 380 |
|
| 381 |
formatted_results = [
|
| 382 |
r for r in formatted_results if r["_global_index"] in audio_indices
|
| 383 |
]
|
| 384 |
|
| 385 |
-
# 3️⃣
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
|
|
|
| 389 |
|
| 390 |
except Exception as e:
|
| 391 |
logger.error("Error while searching %s", e, exc_info=True)
|
| 392 |
return {"error": str(e)}
|
| 393 |
|
| 394 |
|
|
|
|
| 395 |
class ScriptureMultiSearchRequest(BaseModel):
|
| 396 |
filter_obj: Optional[MetadataWhereClause] = None
|
| 397 |
page: int = 1
|
|
|
|
| 296 |
req: ScriptureFirstSearchRequst,
|
| 297 |
):
|
| 298 |
"""
|
| 299 |
+
Search scripture collection and return the first matching result after applying audio filter.
|
|
|
|
|
|
|
|
|
|
| 300 |
"""
|
| 301 |
filter_obj = req.filter_obj
|
| 302 |
has_audio = req.has_audio
|
|
|
|
| 316 |
if not config:
|
| 317 |
return {"error": f"Scripture '{scripture_name}' not found"}
|
| 318 |
|
| 319 |
+
# 1️⃣ Fetch all matches
|
| 320 |
+
results = db.fetch_all_matches(
|
| 321 |
collection_name=config["collection_name"],
|
| 322 |
metadata_where_clause=filter_obj,
|
| 323 |
)
|
|
|
|
| 328 |
metadata_doc = results["metadatas"][i]
|
| 329 |
metadata_doc["id"] = doc_id
|
| 330 |
|
| 331 |
+
document_text = results["documents"][i] if results.get("documents") else None
|
|
|
|
|
|
|
| 332 |
canonical_doc = SanatanConfig().canonicalize_document(
|
| 333 |
scripture_name, document_text, metadata_doc
|
| 334 |
)
|
| 335 |
formatted_results.append(canonical_doc)
|
| 336 |
|
| 337 |
+
# 2️⃣ Apply has_audio filter
|
| 338 |
if has_audio and formatted_results:
|
| 339 |
if has_audio == AudioType.none:
|
|
|
|
| 340 |
all_audio_indices = set()
|
| 341 |
for atype in [
|
| 342 |
AudioType.recitation,
|
|
|
|
| 347 |
indices = await svc_get_indices_with_audio(scripture_name, atype)
|
| 348 |
all_audio_indices.update(indices)
|
| 349 |
|
|
|
|
| 350 |
formatted_results = [
|
| 351 |
+
r for r in formatted_results if r["_global_index"] not in all_audio_indices
|
|
|
|
|
|
|
| 352 |
]
|
|
|
|
| 353 |
else:
|
| 354 |
+
audio_indices = set()
|
| 355 |
if has_audio == AudioType.any:
|
|
|
|
| 356 |
for atype in [
|
| 357 |
AudioType.recitation,
|
| 358 |
AudioType.virutham,
|
| 359 |
AudioType.upanyasam,
|
| 360 |
AudioType.santhai,
|
| 361 |
]:
|
| 362 |
+
indices = await svc_get_indices_with_audio(scripture_name, atype)
|
|
|
|
|
|
|
| 363 |
audio_indices.update(indices)
|
| 364 |
else:
|
| 365 |
+
audio_indices.update(await svc_get_indices_with_audio(scripture_name, has_audio))
|
|
|
|
|
|
|
| 366 |
|
| 367 |
formatted_results = [
|
| 368 |
r for r in formatted_results if r["_global_index"] in audio_indices
|
| 369 |
]
|
| 370 |
|
| 371 |
+
# 3️⃣ Sort by global index
|
| 372 |
+
formatted_results.sort(key=lambda x: x["_global_index"])
|
| 373 |
+
|
| 374 |
+
# 4️⃣ Return only the first valid result
|
| 375 |
+
return {"results": formatted_results[:1] if formatted_results else []}
|
| 376 |
|
| 377 |
except Exception as e:
|
| 378 |
logger.error("Error while searching %s", e, exc_info=True)
|
| 379 |
return {"error": str(e)}
|
| 380 |
|
| 381 |
|
| 382 |
+
|
| 383 |
class ScriptureMultiSearchRequest(BaseModel):
|
| 384 |
filter_obj: Optional[MetadataWhereClause] = None
|
| 385 |
page: int = 1
|