vikramvasudevan commited on
Commit
5b0aa61
·
verified ·
1 Parent(s): 9dfa5ae

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. db.py +31 -2
  2. server.py +29 -15
db.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import pandas as pd
2
  import numpy as np
3
  import random
@@ -20,8 +21,21 @@ logger.setLevel(logging.INFO)
20
 
21
 
22
  class SanatanDatabase:
23
- def __init__(self) -> None:
 
 
 
 
 
 
 
 
 
 
24
  self.chroma_client = chromadb.PersistentClient(path=SanatanConfig.dbStorePath)
 
 
 
25
 
26
  def does_data_exist(self, collection_name: str) -> bool:
27
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
@@ -540,11 +554,26 @@ class SanatanDatabase:
540
  )
541
 
542
  def count(self, collection_name: str):
 
 
 
 
 
 
 
 
 
 
 
 
543
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
544
  total_count = collection.count()
545
  logger.info("Total records in [%s] = %d", collection_name, total_count)
546
- return total_count
547
 
 
 
 
 
548
  def test_sanity(self):
549
  for scripture in SanatanConfig().scriptures:
550
  count = self.count(collection_name=scripture["collection_name"])
 
1
+ import time
2
  import pandas as pd
3
  import numpy as np
4
  import random
 
21
 
22
 
23
  class SanatanDatabase:
24
+ _instance = None
25
+
26
+ def __new__(cls, *args, **kwargs):
27
+ # ✅ Ensure only one instance exists
28
+ if cls._instance is None:
29
+ cls._instance = super().__new__(cls)
30
+ cls._instance._init_once()
31
+ return cls._instance
32
+
33
+ def _init_once(self):
34
+ """Initialize once per process"""
35
  self.chroma_client = chromadb.PersistentClient(path=SanatanConfig.dbStorePath)
36
+ self._count_cache = {} # {collection_name: (timestamp, count)}
37
+ self._cache_ttl = 84600 # seconds (24 hours)
38
+ logger.info("✅ SanatanDatabase singleton initialized")
39
 
40
  def does_data_exist(self, collection_name: str) -> bool:
41
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
 
554
  )
555
 
556
  def count(self, collection_name: str):
557
+ # check cache first
558
+ now = time.time()
559
+ cached_entry = self._count_cache.get(collection_name)
560
+ if cached_entry:
561
+ ts, cached_count = cached_entry
562
+ if now - ts < self._cache_ttl:
563
+ logger.debug("Cache hit for collection [%s]: %d", collection_name, cached_count)
564
+ return cached_count
565
+ else:
566
+ logger.debug("Cache expired for [%s]", collection_name)
567
+
568
+ # fetch fresh count
569
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
570
  total_count = collection.count()
571
  logger.info("Total records in [%s] = %d", collection_name, total_count)
 
572
 
573
+ # update cache
574
+ self._count_cache[collection_name] = (now, total_count)
575
+ return total_count
576
+
577
  def test_sanity(self):
578
  for scripture in SanatanConfig().scriptures:
579
  count = self.count(collection_name=scripture["collection_name"])
server.py CHANGED
@@ -1,12 +1,12 @@
1
  # server.py
2
  import random
3
  import traceback
4
- from typing import Optional
5
  import uuid
6
  from fastapi import APIRouter, HTTPException, Request, Query
7
  from fastapi.responses import JSONResponse
8
  import pycountry
9
- from pydantic import BaseModel
10
  from chat_utils import chat
11
  from config import SanatanConfig
12
  from db import SanatanDatabase
@@ -119,13 +119,13 @@ async def handle_fetch_languages():
119
  languages.sort(key=lambda x: x["name"])
120
  return languages
121
 
 
122
  @router.get("/languages_v2")
123
  async def fn_handle_fetch_languages_v2():
124
  val = await handle_fetch_languages_v2()
125
  return val
126
 
127
 
128
-
129
  @router.post("/greet")
130
  async def handle_greet(msg: Message):
131
  markdown = "Namaskaram 🙏 I am **bhashyam.ai** and I can help you explore the following scriptures:\n---\n"
@@ -212,6 +212,9 @@ async def handle_get_scriptures():
212
  class ScriptureRequest(BaseModel):
213
  scripture_name: str
214
  unit_index: int
 
 
 
215
 
216
 
217
  @router.post("/scripture")
@@ -219,26 +222,35 @@ async def get_scripture(req: ScriptureRequest):
219
  """
220
  Return a scripture unit (page or verse, based on config),
221
  including all metadata fields separately.
222
- used for page view to fetch by global index.
223
  """
224
- logger.info("get_scripture: received request to fetch scripture: %s", req)
 
 
 
 
225
 
226
  # find config entry for the scripture
227
  config = next(
228
  (s for s in SanatanConfig().scriptures if s["name"] == req.scripture_name), None
229
  )
230
  if not config:
231
- return {"error": f"Scripture '{req.scripture_name}' not found"}
 
 
 
232
 
233
  # fetch the raw document from DB
234
  raw_doc = SanatanDatabase().fetch_document_by_index(
235
  collection_name=config["collection_name"],
236
  index=req.unit_index,
237
- # unit_name=config.get("unit_field", config.get("unit")),
238
  )
239
 
240
  if not raw_doc or isinstance(raw_doc, str) or "error" in raw_doc:
241
- return {"error": f"No data available for unit {req.unit_index}"}
 
 
 
242
 
243
  # canonicalize it
244
  canonical_doc = SanatanConfig().canonicalize_document(
@@ -249,8 +261,8 @@ async def get_scripture(req: ScriptureRequest):
249
 
250
  # add unit index & total units (so Flutter can paginate)
251
  canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
 
252
 
253
- # print("canonical_doc = ", canonical_doc)
254
  return canonical_doc
255
 
256
 
@@ -277,7 +289,7 @@ async def get_scripture_configs():
277
  {
278
  "name": s["name"], # e.g. "bhagavad_gita"
279
  "title": s["title"], # e.g. "Bhagavad Gita"
280
- "banner_url": s.get("banner_url",None),
281
  "category": s["category"], # e.g. "Philosophy"
282
  "unit": s["unit"], # e.g. "verse" or "page"
283
  "unit_field": s.get("unit_field", s.get("unit")),
@@ -335,12 +347,12 @@ async def search_scripture_find_first_match(
335
  page_size=None,
336
  )
337
  else:
338
- # optimization. get only first match if no has_audio parameter is provided.
339
- result = db.fetch_first_match(
340
  collection_name=config["collection_name"],
341
  metadata_where_clause=filter_obj,
342
  )
343
- results = {
344
  "ids": list(result["ids"]),
345
  "documents": list(result["documents"]),
346
  "metadatas": list(result["metadatas"]),
@@ -578,10 +590,11 @@ def route_get_donation_product_ids(include_tests: bool = False):
578
  ]
579
  return products
580
 
 
581
  @router.get("/discourse/list")
582
  async def get_all_discourses(
583
  page: int = Query(1, ge=1, description="Page number (1-indexed)"),
584
- per_page: int = Query(10, ge=1, le=100, description="Number of items per page")
585
  ):
586
  """
587
  Returns a paginated list of discourse topics.
@@ -593,6 +606,7 @@ async def get_all_discourses(
593
  result = await get_discourse_summaries(page=page, per_page=per_page)
594
  return result
595
 
 
596
  @router.get("/discourse/find/{topic_id}")
597
  async def get_discourse_detail(topic_id: int):
598
  """
@@ -601,4 +615,4 @@ async def get_discourse_detail(topic_id: int):
601
  topic = await get_discourse_by_id(topic_id)
602
  if not topic:
603
  raise HTTPException(status_code=404, detail="Discourse topic not found")
604
- return topic
 
1
  # server.py
2
  import random
3
  import traceback
4
+ from typing import Optional
5
  import uuid
6
  from fastapi import APIRouter, HTTPException, Request, Query
7
  from fastapi.responses import JSONResponse
8
  import pycountry
9
+ from pydantic import BaseModel, Field
10
  from chat_utils import chat
11
  from config import SanatanConfig
12
  from db import SanatanDatabase
 
119
  languages.sort(key=lambda x: x["name"])
120
  return languages
121
 
122
+
123
  @router.get("/languages_v2")
124
  async def fn_handle_fetch_languages_v2():
125
  val = await handle_fetch_languages_v2()
126
  return val
127
 
128
 
 
129
  @router.post("/greet")
130
  async def handle_greet(msg: Message):
131
  markdown = "Namaskaram 🙏 I am **bhashyam.ai** and I can help you explore the following scriptures:\n---\n"
 
212
  class ScriptureRequest(BaseModel):
213
  scripture_name: str
214
  unit_index: int
215
+ request_id: str | None = Field(
216
+ default=None, alias="request_id"
217
+ ) # optional, backward compatible
218
 
219
 
220
  @router.post("/scripture")
 
222
  """
223
  Return a scripture unit (page or verse, based on config),
224
  including all metadata fields separately.
225
+ Used for page view to fetch by global index.
226
  """
227
+ # ensure we have a valid request id (deviceId)
228
+ request_id = req.request_id or f"auto-{uuid.uuid4()}"
229
+ logger.info(
230
+ f"get_scripture: received requestId={request_id}, scripture={req.scripture_name}, unit_index={req.unit_index}"
231
+ )
232
 
233
  # find config entry for the scripture
234
  config = next(
235
  (s for s in SanatanConfig().scriptures if s["name"] == req.scripture_name), None
236
  )
237
  if not config:
238
+ return {
239
+ "error": f"Scripture '{req.scripture_name}' not found",
240
+ "requestId": request_id,
241
+ }
242
 
243
  # fetch the raw document from DB
244
  raw_doc = SanatanDatabase().fetch_document_by_index(
245
  collection_name=config["collection_name"],
246
  index=req.unit_index,
 
247
  )
248
 
249
  if not raw_doc or isinstance(raw_doc, str) or "error" in raw_doc:
250
+ return {
251
+ "error": f"No data available for unit {req.unit_index}",
252
+ "requestId": request_id,
253
+ }
254
 
255
  # canonicalize it
256
  canonical_doc = SanatanConfig().canonicalize_document(
 
261
 
262
  # add unit index & total units (so Flutter can paginate)
263
  canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
264
+ canonical_doc["requestId"] = request_id
265
 
 
266
  return canonical_doc
267
 
268
 
 
289
  {
290
  "name": s["name"], # e.g. "bhagavad_gita"
291
  "title": s["title"], # e.g. "Bhagavad Gita"
292
+ "banner_url": s.get("banner_url", None),
293
  "category": s["category"], # e.g. "Philosophy"
294
  "unit": s["unit"], # e.g. "verse" or "page"
295
  "unit_field": s.get("unit_field", s.get("unit")),
 
347
  page_size=None,
348
  )
349
  else:
350
+ # optimization. get only first match if no has_audio parameter is provided.
351
+ result = db.fetch_first_match(
352
  collection_name=config["collection_name"],
353
  metadata_where_clause=filter_obj,
354
  )
355
+ results = {
356
  "ids": list(result["ids"]),
357
  "documents": list(result["documents"]),
358
  "metadatas": list(result["metadatas"]),
 
590
  ]
591
  return products
592
 
593
+
594
  @router.get("/discourse/list")
595
  async def get_all_discourses(
596
  page: int = Query(1, ge=1, description="Page number (1-indexed)"),
597
+ per_page: int = Query(10, ge=1, le=100, description="Number of items per page"),
598
  ):
599
  """
600
  Returns a paginated list of discourse topics.
 
606
  result = await get_discourse_summaries(page=page, per_page=per_page)
607
  return result
608
 
609
+
610
  @router.get("/discourse/find/{topic_id}")
611
  async def get_discourse_detail(topic_id: int):
612
  """
 
615
  topic = await get_discourse_by_id(topic_id)
616
  if not topic:
617
  raise HTTPException(status_code=404, detail="Discourse topic not found")
618
+ return topic