Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
db.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import numpy as np
|
| 3 |
import random
|
|
@@ -20,8 +21,21 @@ logger.setLevel(logging.INFO)
|
|
| 20 |
|
| 21 |
|
| 22 |
class SanatanDatabase:
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
self.chroma_client = chromadb.PersistentClient(path=SanatanConfig.dbStorePath)
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def does_data_exist(self, collection_name: str) -> bool:
|
| 27 |
collection = self.chroma_client.get_or_create_collection(name=collection_name)
|
|
@@ -540,11 +554,26 @@ class SanatanDatabase:
|
|
| 540 |
)
|
| 541 |
|
| 542 |
def count(self, collection_name: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 543 |
collection = self.chroma_client.get_or_create_collection(name=collection_name)
|
| 544 |
total_count = collection.count()
|
| 545 |
logger.info("Total records in [%s] = %d", collection_name, total_count)
|
| 546 |
-
return total_count
|
| 547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
def test_sanity(self):
|
| 549 |
for scripture in SanatanConfig().scriptures:
|
| 550 |
count = self.count(collection_name=scripture["collection_name"])
|
|
|
|
| 1 |
+
import time
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
import random
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class SanatanDatabase:
|
| 24 |
+
_instance = None
|
| 25 |
+
|
| 26 |
+
def __new__(cls, *args, **kwargs):
|
| 27 |
+
# ✅ Ensure only one instance exists
|
| 28 |
+
if cls._instance is None:
|
| 29 |
+
cls._instance = super().__new__(cls)
|
| 30 |
+
cls._instance._init_once()
|
| 31 |
+
return cls._instance
|
| 32 |
+
|
| 33 |
+
def _init_once(self):
|
| 34 |
+
"""Initialize once per process"""
|
| 35 |
self.chroma_client = chromadb.PersistentClient(path=SanatanConfig.dbStorePath)
|
| 36 |
+
self._count_cache = {} # {collection_name: (timestamp, count)}
|
| 37 |
+
self._cache_ttl = 84600 # seconds (24 hours)
|
| 38 |
+
logger.info("✅ SanatanDatabase singleton initialized")
|
| 39 |
|
| 40 |
def does_data_exist(self, collection_name: str) -> bool:
|
| 41 |
collection = self.chroma_client.get_or_create_collection(name=collection_name)
|
|
|
|
| 554 |
)
|
| 555 |
|
| 556 |
def count(self, collection_name: str):
|
| 557 |
+
# check cache first
|
| 558 |
+
now = time.time()
|
| 559 |
+
cached_entry = self._count_cache.get(collection_name)
|
| 560 |
+
if cached_entry:
|
| 561 |
+
ts, cached_count = cached_entry
|
| 562 |
+
if now - ts < self._cache_ttl:
|
| 563 |
+
logger.debug("Cache hit for collection [%s]: %d", collection_name, cached_count)
|
| 564 |
+
return cached_count
|
| 565 |
+
else:
|
| 566 |
+
logger.debug("Cache expired for [%s]", collection_name)
|
| 567 |
+
|
| 568 |
+
# fetch fresh count
|
| 569 |
collection = self.chroma_client.get_or_create_collection(name=collection_name)
|
| 570 |
total_count = collection.count()
|
| 571 |
logger.info("Total records in [%s] = %d", collection_name, total_count)
|
|
|
|
| 572 |
|
| 573 |
+
# update cache
|
| 574 |
+
self._count_cache[collection_name] = (now, total_count)
|
| 575 |
+
return total_count
|
| 576 |
+
|
| 577 |
def test_sanity(self):
|
| 578 |
for scripture in SanatanConfig().scriptures:
|
| 579 |
count = self.count(collection_name=scripture["collection_name"])
|
server.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
# server.py
|
| 2 |
import random
|
| 3 |
import traceback
|
| 4 |
-
from typing import
|
| 5 |
import uuid
|
| 6 |
from fastapi import APIRouter, HTTPException, Request, Query
|
| 7 |
from fastapi.responses import JSONResponse
|
| 8 |
import pycountry
|
| 9 |
-
from pydantic import BaseModel
|
| 10 |
from chat_utils import chat
|
| 11 |
from config import SanatanConfig
|
| 12 |
from db import SanatanDatabase
|
|
@@ -119,13 +119,13 @@ async def handle_fetch_languages():
|
|
| 119 |
languages.sort(key=lambda x: x["name"])
|
| 120 |
return languages
|
| 121 |
|
|
|
|
| 122 |
@router.get("/languages_v2")
|
| 123 |
async def fn_handle_fetch_languages_v2():
|
| 124 |
val = await handle_fetch_languages_v2()
|
| 125 |
return val
|
| 126 |
|
| 127 |
|
| 128 |
-
|
| 129 |
@router.post("/greet")
|
| 130 |
async def handle_greet(msg: Message):
|
| 131 |
markdown = "Namaskaram 🙏 I am **bhashyam.ai** and I can help you explore the following scriptures:\n---\n"
|
|
@@ -212,6 +212,9 @@ async def handle_get_scriptures():
|
|
| 212 |
class ScriptureRequest(BaseModel):
|
| 213 |
scripture_name: str
|
| 214 |
unit_index: int
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
|
| 217 |
@router.post("/scripture")
|
|
@@ -219,26 +222,35 @@ async def get_scripture(req: ScriptureRequest):
|
|
| 219 |
"""
|
| 220 |
Return a scripture unit (page or verse, based on config),
|
| 221 |
including all metadata fields separately.
|
| 222 |
-
|
| 223 |
"""
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
# find config entry for the scripture
|
| 227 |
config = next(
|
| 228 |
(s for s in SanatanConfig().scriptures if s["name"] == req.scripture_name), None
|
| 229 |
)
|
| 230 |
if not config:
|
| 231 |
-
return {
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
# fetch the raw document from DB
|
| 234 |
raw_doc = SanatanDatabase().fetch_document_by_index(
|
| 235 |
collection_name=config["collection_name"],
|
| 236 |
index=req.unit_index,
|
| 237 |
-
# unit_name=config.get("unit_field", config.get("unit")),
|
| 238 |
)
|
| 239 |
|
| 240 |
if not raw_doc or isinstance(raw_doc, str) or "error" in raw_doc:
|
| 241 |
-
return {
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
# canonicalize it
|
| 244 |
canonical_doc = SanatanConfig().canonicalize_document(
|
|
@@ -249,8 +261,8 @@ async def get_scripture(req: ScriptureRequest):
|
|
| 249 |
|
| 250 |
# add unit index & total units (so Flutter can paginate)
|
| 251 |
canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
|
|
|
|
| 252 |
|
| 253 |
-
# print("canonical_doc = ", canonical_doc)
|
| 254 |
return canonical_doc
|
| 255 |
|
| 256 |
|
|
@@ -277,7 +289,7 @@ async def get_scripture_configs():
|
|
| 277 |
{
|
| 278 |
"name": s["name"], # e.g. "bhagavad_gita"
|
| 279 |
"title": s["title"], # e.g. "Bhagavad Gita"
|
| 280 |
-
"banner_url": s.get("banner_url",None),
|
| 281 |
"category": s["category"], # e.g. "Philosophy"
|
| 282 |
"unit": s["unit"], # e.g. "verse" or "page"
|
| 283 |
"unit_field": s.get("unit_field", s.get("unit")),
|
|
@@ -335,12 +347,12 @@ async def search_scripture_find_first_match(
|
|
| 335 |
page_size=None,
|
| 336 |
)
|
| 337 |
else:
|
| 338 |
-
|
| 339 |
-
|
| 340 |
collection_name=config["collection_name"],
|
| 341 |
metadata_where_clause=filter_obj,
|
| 342 |
)
|
| 343 |
-
|
| 344 |
"ids": list(result["ids"]),
|
| 345 |
"documents": list(result["documents"]),
|
| 346 |
"metadatas": list(result["metadatas"]),
|
|
@@ -578,10 +590,11 @@ def route_get_donation_product_ids(include_tests: bool = False):
|
|
| 578 |
]
|
| 579 |
return products
|
| 580 |
|
|
|
|
| 581 |
@router.get("/discourse/list")
|
| 582 |
async def get_all_discourses(
|
| 583 |
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
|
| 584 |
-
per_page: int = Query(10, ge=1, le=100, description="Number of items per page")
|
| 585 |
):
|
| 586 |
"""
|
| 587 |
Returns a paginated list of discourse topics.
|
|
@@ -593,6 +606,7 @@ async def get_all_discourses(
|
|
| 593 |
result = await get_discourse_summaries(page=page, per_page=per_page)
|
| 594 |
return result
|
| 595 |
|
|
|
|
| 596 |
@router.get("/discourse/find/{topic_id}")
|
| 597 |
async def get_discourse_detail(topic_id: int):
|
| 598 |
"""
|
|
@@ -601,4 +615,4 @@ async def get_discourse_detail(topic_id: int):
|
|
| 601 |
topic = await get_discourse_by_id(topic_id)
|
| 602 |
if not topic:
|
| 603 |
raise HTTPException(status_code=404, detail="Discourse topic not found")
|
| 604 |
-
return topic
|
|
|
|
| 1 |
# server.py
|
| 2 |
import random
|
| 3 |
import traceback
|
| 4 |
+
from typing import Optional
|
| 5 |
import uuid
|
| 6 |
from fastapi import APIRouter, HTTPException, Request, Query
|
| 7 |
from fastapi.responses import JSONResponse
|
| 8 |
import pycountry
|
| 9 |
+
from pydantic import BaseModel, Field
|
| 10 |
from chat_utils import chat
|
| 11 |
from config import SanatanConfig
|
| 12 |
from db import SanatanDatabase
|
|
|
|
| 119 |
languages.sort(key=lambda x: x["name"])
|
| 120 |
return languages
|
| 121 |
|
| 122 |
+
|
| 123 |
@router.get("/languages_v2")
|
| 124 |
async def fn_handle_fetch_languages_v2():
|
| 125 |
val = await handle_fetch_languages_v2()
|
| 126 |
return val
|
| 127 |
|
| 128 |
|
|
|
|
| 129 |
@router.post("/greet")
|
| 130 |
async def handle_greet(msg: Message):
|
| 131 |
markdown = "Namaskaram 🙏 I am **bhashyam.ai** and I can help you explore the following scriptures:\n---\n"
|
|
|
|
| 212 |
class ScriptureRequest(BaseModel):
|
| 213 |
scripture_name: str
|
| 214 |
unit_index: int
|
| 215 |
+
request_id: str | None = Field(
|
| 216 |
+
default=None, alias="request_id"
|
| 217 |
+
) # optional, backward compatible
|
| 218 |
|
| 219 |
|
| 220 |
@router.post("/scripture")
|
|
|
|
| 222 |
"""
|
| 223 |
Return a scripture unit (page or verse, based on config),
|
| 224 |
including all metadata fields separately.
|
| 225 |
+
Used for page view to fetch by global index.
|
| 226 |
"""
|
| 227 |
+
# ensure we have a valid request id (deviceId)
|
| 228 |
+
request_id = req.request_id or f"auto-{uuid.uuid4()}"
|
| 229 |
+
logger.info(
|
| 230 |
+
f"get_scripture: received requestId={request_id}, scripture={req.scripture_name}, unit_index={req.unit_index}"
|
| 231 |
+
)
|
| 232 |
|
| 233 |
# find config entry for the scripture
|
| 234 |
config = next(
|
| 235 |
(s for s in SanatanConfig().scriptures if s["name"] == req.scripture_name), None
|
| 236 |
)
|
| 237 |
if not config:
|
| 238 |
+
return {
|
| 239 |
+
"error": f"Scripture '{req.scripture_name}' not found",
|
| 240 |
+
"requestId": request_id,
|
| 241 |
+
}
|
| 242 |
|
| 243 |
# fetch the raw document from DB
|
| 244 |
raw_doc = SanatanDatabase().fetch_document_by_index(
|
| 245 |
collection_name=config["collection_name"],
|
| 246 |
index=req.unit_index,
|
|
|
|
| 247 |
)
|
| 248 |
|
| 249 |
if not raw_doc or isinstance(raw_doc, str) or "error" in raw_doc:
|
| 250 |
+
return {
|
| 251 |
+
"error": f"No data available for unit {req.unit_index}",
|
| 252 |
+
"requestId": request_id,
|
| 253 |
+
}
|
| 254 |
|
| 255 |
# canonicalize it
|
| 256 |
canonical_doc = SanatanConfig().canonicalize_document(
|
|
|
|
| 261 |
|
| 262 |
# add unit index & total units (so Flutter can paginate)
|
| 263 |
canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
|
| 264 |
+
canonical_doc["requestId"] = request_id
|
| 265 |
|
|
|
|
| 266 |
return canonical_doc
|
| 267 |
|
| 268 |
|
|
|
|
| 289 |
{
|
| 290 |
"name": s["name"], # e.g. "bhagavad_gita"
|
| 291 |
"title": s["title"], # e.g. "Bhagavad Gita"
|
| 292 |
+
"banner_url": s.get("banner_url", None),
|
| 293 |
"category": s["category"], # e.g. "Philosophy"
|
| 294 |
"unit": s["unit"], # e.g. "verse" or "page"
|
| 295 |
"unit_field": s.get("unit_field", s.get("unit")),
|
|
|
|
| 347 |
page_size=None,
|
| 348 |
)
|
| 349 |
else:
|
| 350 |
+
# optimization. get only first match if no has_audio parameter is provided.
|
| 351 |
+
result = db.fetch_first_match(
|
| 352 |
collection_name=config["collection_name"],
|
| 353 |
metadata_where_clause=filter_obj,
|
| 354 |
)
|
| 355 |
+
results = {
|
| 356 |
"ids": list(result["ids"]),
|
| 357 |
"documents": list(result["documents"]),
|
| 358 |
"metadatas": list(result["metadatas"]),
|
|
|
|
| 590 |
]
|
| 591 |
return products
|
| 592 |
|
| 593 |
+
|
| 594 |
@router.get("/discourse/list")
|
| 595 |
async def get_all_discourses(
|
| 596 |
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
|
| 597 |
+
per_page: int = Query(10, ge=1, le=100, description="Number of items per page"),
|
| 598 |
):
|
| 599 |
"""
|
| 600 |
Returns a paginated list of discourse topics.
|
|
|
|
| 606 |
result = await get_discourse_summaries(page=page, per_page=per_page)
|
| 607 |
return result
|
| 608 |
|
| 609 |
+
|
| 610 |
@router.get("/discourse/find/{topic_id}")
|
| 611 |
async def get_discourse_detail(topic_id: int):
|
| 612 |
"""
|
|
|
|
| 615 |
topic = await get_discourse_by_id(topic_id)
|
| 616 |
if not topic:
|
| 617 |
raise HTTPException(status_code=404, detail="Discourse topic not found")
|
| 618 |
+
return topic
|