vikramvasudevan commited on
Commit
ec48ce1
·
verified ·
1 Parent(s): dd07ca6

Upload folder using huggingface_hub

Browse files
db.py CHANGED
@@ -77,43 +77,81 @@ class SanatanDatabase:
77
  metadatas=[metas[i] for i in indices],
78
  )
79
 
80
- def fetch_first_match(
81
  self, collection_name: str, metadata_where_clause: MetadataWhereClause = None
82
  ):
83
- """This version is created to support the browse module"""
84
  logger.info(
85
  "getting first matching verses from [%s] | metadata_where_clause = %s",
86
  collection_name,
87
  metadata_where_clause,
88
  )
89
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
90
- data = collection.get(
91
- include=["metadatas", "documents"],
92
- where=(
93
- metadata_where_clause.to_chroma_where()
94
- if metadata_where_clause is not None
95
- else None
96
- ),
97
  )
98
 
 
 
99
  if data["metadatas"]:
100
- # find index of record with lowest _global_index
101
  min_index = min(
102
  range(len(data["metadatas"])),
103
  key=lambda i: data["metadatas"][i].get("_global_index", float("inf")),
104
  )
105
-
106
- # shrink data to keep same structure but only one record
107
- data = {
108
  "ids": [data["ids"][min_index]],
109
  "documents": [data["documents"][min_index]],
110
  "metadatas": [data["metadatas"][min_index]],
111
  }
112
- else:
113
- logger.warning("No data found! - data=%s", data)
 
 
 
 
 
 
 
 
 
 
 
 
114
  return chromadb.GetResult(ids=[], documents=[], metadatas=[])
115
 
116
- return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def search(
119
  self,
 
77
  metadatas=[metas[i] for i in indices],
78
  )
79
 
80
+ def fetch_first_match(
81
  self, collection_name: str, metadata_where_clause: MetadataWhereClause = None
82
  ):
83
+ """This version is created to support the browse module with fallback regex matching"""
84
  logger.info(
85
  "getting first matching verses from [%s] | metadata_where_clause = %s",
86
  collection_name,
87
  metadata_where_clause,
88
  )
89
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
90
+
91
+ where_clause = (
92
+ metadata_where_clause.to_chroma_where()
93
+ if metadata_where_clause is not None
94
+ else None
 
 
95
  )
96
 
97
+ data = collection.get(include=["metadatas", "documents"], where=where_clause)
98
+
99
  if data["metadatas"]:
100
+ # normal path
101
  min_index = min(
102
  range(len(data["metadatas"])),
103
  key=lambda i: data["metadatas"][i].get("_global_index", float("inf")),
104
  )
105
+ return {
 
 
106
  "ids": [data["ids"][min_index]],
107
  "documents": [data["documents"][min_index]],
108
  "metadatas": [data["metadatas"][min_index]],
109
  }
110
+
111
+ # ⚠️ fallback path
112
+ logger.warning("No data found using strict filter. Attempting regex fallback.")
113
+
114
+ if not metadata_where_clause or not metadata_where_clause.filters:
115
+ return chromadb.GetResult(ids=[], documents=[], metadatas=[])
116
+
117
+ # find filters with $eq string type
118
+ regex_filters = [
119
+ f for f in metadata_where_clause.filters
120
+ if f.metadata_search_operator == "$eq" and isinstance(f.metadata_value, str)
121
+ ]
122
+
123
+ if not regex_filters:
124
  return chromadb.GetResult(ids=[], documents=[], metadatas=[])
125
 
126
+ # Pull all documents for manual regex scan
127
+ all_data = collection.get(include=["metadatas", "documents"])
128
+
129
+ matched_indices = []
130
+ for i, meta in enumerate(all_data["metadatas"]):
131
+ ok = True
132
+ for f in regex_filters:
133
+ field_val = str(meta.get(f.metadata_field, ""))
134
+ # case-insensitive substring search as fallback
135
+ if not re.search(re.escape(f.metadata_value), field_val, flags=re.IGNORECASE):
136
+ ok = False
137
+ break
138
+ if ok:
139
+ matched_indices.append(i)
140
+
141
+ if not matched_indices:
142
+ logger.warning("Regex fallback also found no matches.")
143
+ return chromadb.GetResult(ids=[], documents=[], metadatas=[])
144
+
145
+ # Pick lowest _global_index among matches
146
+ min_index = min(
147
+ matched_indices,
148
+ key=lambda i: all_data["metadatas"][i].get("_global_index", float("inf")),
149
+ )
150
+ return {
151
+ "ids": [all_data["ids"][min_index]],
152
+ "documents": [all_data["documents"][min_index]],
153
+ "metadatas": [all_data["metadatas"][min_index]],
154
+ }
155
 
156
  def search(
157
  self,
modules/config/divya_prabandham.py CHANGED
@@ -78,23 +78,6 @@ divya_prabandham_config = {
78
  for divya_desam_name in nalayiram_helper.get_standardized_divya_desam_names()
79
  ],
80
  },
81
- {
82
- "name": "title",
83
- "label": "Pasuram Title",
84
- "datatype": "str",
85
- "description": (
86
- "Exact title of a pasuram in one of the following formats:\n"
87
- "1. '{prabandham_code} {decade}.{chapter}.{pasuram}' — use when the prabandham has decades.\n"
88
- "2. '{prabandham_code} {chapter}.{pasuram}' — use when the prabandham does not have decades.\n\n"
89
- "⚠️ Use this field ONLY when the user provides a specific prabandham and a relative verse number.\n"
90
- "Examples of valid usage:\n"
91
- "- User query: '3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.'\n"
92
- " → Convert to: '{prabandham_code} 1.8.3' and pass as `title` filter.\n"
93
- "- User query: '2nd pasuram of chapter 5 in [Prabandham with no decades].'\n"
94
- " → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n"
95
- "Do NOT use `title` for general queries or keyword searches — leave it empty in those cases."
96
- ),
97
- },
98
  {
99
  "name": "verse",
100
  "label": "Absolute Pasuram Number",
@@ -136,6 +119,38 @@ divya_prabandham_config = {
136
  ),
137
  "show_as_filter": True,
138
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  ],
140
  "pdf_path": "./data/divya_prabandham.pdf",
141
  "source": "https://uveda.org",
 
78
  for divya_desam_name in nalayiram_helper.get_standardized_divya_desam_names()
79
  ],
80
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  {
82
  "name": "verse",
83
  "label": "Absolute Pasuram Number",
 
119
  ),
120
  "show_as_filter": True,
121
  },
122
+ {
123
+ "name": "title",
124
+ "label": "Pasuram Title",
125
+ "datatype": "str",
126
+ "description": (
127
+ "Exact title of a pasuram in one of the following formats:\n"
128
+ "1. '{prabandham_code} {decade}.{chapter}.{pasuram}' — use when the prabandham has decades.\n"
129
+ "2. '{prabandham_code} {chapter}.{pasuram}' — use when the prabandham does not have decades.\n\n"
130
+ "⚠️ Use this field ONLY when the user provides a specific prabandham and a relative verse number.\n"
131
+ "Examples of valid usage:\n"
132
+ "- User query: '3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.'\n"
133
+ " → Convert to: '{prabandham_code} 1.8.3' and pass as `title` filter.\n"
134
+ "- User query: '2nd pasuram of chapter 5 in [Prabandham with no decades].'\n"
135
+ " → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n"
136
+ "Do NOT use `title` for general queries or keyword searches — leave it empty in those cases."
137
+ ),
138
+ "show_as_filter": True,
139
+ },
140
+ {
141
+ "name": "pasuram_en",
142
+ "label": "Pasuram Lyrics in English",
143
+ "datatype": "str",
144
+ "description": "Pasuram lyrics in English",
145
+ "show_as_filter": True,
146
+ },
147
+ {
148
+ "name": "pasuram_ta",
149
+ "label": "Pasuram Lyrics in Tamil",
150
+ "datatype": "str",
151
+ "description": "Pasuram lyrics in Tamil",
152
+ "show_as_filter": True,
153
+ },
154
  ],
155
  "pdf_path": "./data/divya_prabandham.pdf",
156
  "source": "https://uveda.org",
modules/config/katakam.py CHANGED
@@ -64,12 +64,14 @@ katakam_config = {
64
  "label": "Lyrics in sanskrit",
65
  "datatype": "str",
66
  "description": "The original sloka in sanskrit.",
 
67
  },
68
  {
69
  "name": "transliteration",
70
  "label": "Transliteration in english",
71
  "datatype": "str",
72
  "description": "The original sloka transliterated in English.",
 
73
  },
74
  ],
75
  "pdf_path": "./data/katakam.pdf",
 
64
  "label": "Lyrics in sanskrit",
65
  "datatype": "str",
66
  "description": "The original sloka in sanskrit.",
67
+ "show_as_filter": True,
68
  },
69
  {
70
  "name": "transliteration",
71
  "label": "Transliteration in english",
72
  "datatype": "str",
73
  "description": "The original sloka transliterated in English.",
74
+ "show_as_filter": True,
75
  },
76
  ],
77
  "pdf_path": "./data/katakam.pdf",
modules/config/pancha_sooktham.py CHANGED
@@ -31,12 +31,6 @@ pancha_sooktham_config = {
31
  "lov": lambda: get_pancha_sooktham_chapters(),
32
  "is_unique": True,
33
  },
34
- {
35
- "name": "sanskrit",
36
- "label": "Lyrics in sanskrit",
37
- "datatype": "str",
38
- "description": "The original sloka in sanskrit.",
39
- },
40
  {
41
  "name": "relative_verse_number",
42
  "label": "Relative Verse Number",
@@ -45,6 +39,13 @@ pancha_sooktham_config = {
45
  "show_as_filter": True,
46
  "is_unique": True,
47
  },
 
 
 
 
 
 
 
48
  ],
49
  "pdf_path": "./data/pancha_sooktham.pdf",
50
  "source": "https://stotranidhi.com/",
 
31
  "lov": lambda: get_pancha_sooktham_chapters(),
32
  "is_unique": True,
33
  },
 
 
 
 
 
 
34
  {
35
  "name": "relative_verse_number",
36
  "label": "Relative Verse Number",
 
39
  "show_as_filter": True,
40
  "is_unique": True,
41
  },
42
+ {
43
+ "name": "sanskrit",
44
+ "label": "Lyrics in sanskrit",
45
+ "datatype": "str",
46
+ "description": "The original sloka in sanskrit.",
47
+ "show_as_filter": True,
48
+ },
49
  ],
50
  "pdf_path": "./data/pancha_sooktham.pdf",
51
  "source": "https://stotranidhi.com/",
modules/config/shanthi_panchakam.py CHANGED
@@ -32,12 +32,6 @@ shanthi_panchakam_config = {
32
  "lov": lambda: get_shanthi_panchakam_chapters(),
33
  "is_unique": True,
34
  },
35
- {
36
- "name": "sanskrit",
37
- "label": "Lyrics in sanskrit",
38
- "datatype": "str",
39
- "description": "The original sloka in sanskrit.",
40
- },
41
  {
42
  "name": "relative_verse_number",
43
  "label": "Relative Verse Number",
@@ -46,6 +40,13 @@ shanthi_panchakam_config = {
46
  "show_as_filter": True,
47
  "is_unique": True,
48
  },
 
 
 
 
 
 
 
49
  ],
50
  "pdf_path": "./data/shanthi_panchakam.pdf",
51
  "source": "https://vignanam.org/english/shanti-panchakam.html",
 
32
  "lov": lambda: get_shanthi_panchakam_chapters(),
33
  "is_unique": True,
34
  },
 
 
 
 
 
 
35
  {
36
  "name": "relative_verse_number",
37
  "label": "Relative Verse Number",
 
40
  "show_as_filter": True,
41
  "is_unique": True,
42
  },
43
+ {
44
+ "name": "sanskrit",
45
+ "label": "Lyrics in sanskrit",
46
+ "datatype": "str",
47
+ "description": "The original sloka in sanskrit.",
48
+ "show_as_filter": True,
49
+ },
50
  ],
51
  "pdf_path": "./data/shanthi_panchakam.pdf",
52
  "source": "https://vignanam.org/english/shanti-panchakam.html",
modules/config/sri_stavam.py CHANGED
@@ -29,23 +29,26 @@ sri_stavam_config = {
29
  "show_as_filter": True,
30
  "is_unique": True,
31
  },
32
- {
33
- "name": "meaning_short",
34
- "label": "Short meaning",
35
- "datatype": "str",
36
- "description": "A short meaning of the sanskrit verse in English.",
37
- },
38
  {
39
  "name": "sanskrit",
40
  "label": "Lyrics in sanskrit",
41
  "datatype": "str",
42
  "description": "Verse in sanskrit",
 
43
  },
44
  {
45
  "name": "transliteration",
46
  "label": "English Transliteration",
47
  "datatype": "str",
48
  "description": "Verse transliterated to English",
 
 
 
 
 
 
 
 
49
  },
50
  ],
51
  "pdf_path": "./data/sri_stavam.pdf",
 
29
  "show_as_filter": True,
30
  "is_unique": True,
31
  },
 
 
 
 
 
 
32
  {
33
  "name": "sanskrit",
34
  "label": "Lyrics in sanskrit",
35
  "datatype": "str",
36
  "description": "Verse in sanskrit",
37
+ "show_as_filter": True,
38
  },
39
  {
40
  "name": "transliteration",
41
  "label": "English Transliteration",
42
  "datatype": "str",
43
  "description": "Verse transliterated to English",
44
+ "show_as_filter": True,
45
+ },
46
+ {
47
+ "name": "meaning_short",
48
+ "label": "Short meaning",
49
+ "datatype": "str",
50
+ "description": "A short meaning of the sanskrit verse in English.",
51
+ "show_as_filter": True,
52
  },
53
  ],
54
  "pdf_path": "./data/sri_stavam.pdf",
modules/config/taitriya_aranyakam.py CHANGED
@@ -83,12 +83,14 @@ taitriya_aranyakam_config = {
83
  "label": "Lyrics in sanskrit",
84
  "datatype": "str",
85
  "description": "The original sloka in sanskrit.",
 
86
  },
87
  {
88
  "name": "transliteration",
89
  "label": "Transliteration in english",
90
  "datatype": "str",
91
  "description": "The original sloka transliterated in English.",
 
92
  },
93
  ],
94
  "pdf_path": "./data/taitriya_aranyakam.pdf",
 
83
  "label": "Lyrics in sanskrit",
84
  "datatype": "str",
85
  "description": "The original sloka in sanskrit.",
86
+ "show_as_filter": True,
87
  },
88
  {
89
  "name": "transliteration",
90
  "label": "Transliteration in english",
91
  "datatype": "str",
92
  "description": "The original sloka transliterated in English.",
93
+ "show_as_filter": True,
94
  },
95
  ],
96
  "pdf_path": "./data/taitriya_aranyakam.pdf",
modules/config/taitriya_brahmanam.py CHANGED
@@ -76,12 +76,14 @@ taitriya_brahmanam_config = {
76
  "label": "Lyrics in sanskrit",
77
  "datatype": "str",
78
  "description": "The original sloka in sanskrit.",
 
79
  },
80
  {
81
  "name": "transliteration",
82
  "label": "Transliteration in english",
83
  "datatype": "str",
84
  "description": "The original sloka transliterated in English.",
 
85
  },
86
  ],
87
  "pdf_path": "./data/taitriya_brahmanam.pdf",
 
76
  "label": "Lyrics in sanskrit",
77
  "datatype": "str",
78
  "description": "The original sloka in sanskrit.",
79
+ "show_as_filter": True,
80
  },
81
  {
82
  "name": "transliteration",
83
  "label": "Transliteration in english",
84
  "datatype": "str",
85
  "description": "The original sloka transliterated in English.",
86
+ "show_as_filter": True,
87
  },
88
  ],
89
  "pdf_path": "./data/taitriya_brahmanam.pdf",
modules/config/taitriya_samhitha.py CHANGED
@@ -76,12 +76,14 @@ taitriya_samhitha_config = {
76
  "label": "Lyrics in sanskrit",
77
  "datatype": "str",
78
  "description": "The original sloka in sanskrit.",
 
79
  },
80
  {
81
  "name": "transliteration",
82
  "label": "Transliteration in english",
83
  "datatype": "str",
84
  "description": "The original sloka transliterated in English.",
 
85
  },
86
  ],
87
  "pdf_path": "./data/taitriya_samhitha.pdf",
 
76
  "label": "Lyrics in sanskrit",
77
  "datatype": "str",
78
  "description": "The original sloka in sanskrit.",
79
+ "show_as_filter": True,
80
  },
81
  {
82
  "name": "transliteration",
83
  "label": "Transliteration in english",
84
  "datatype": "str",
85
  "description": "The original sloka transliterated in English.",
86
+ "show_as_filter": True,
87
  },
88
  ],
89
  "pdf_path": "./data/taitriya_samhitha.pdf",
modules/config/taitriya_upanishad.py CHANGED
@@ -32,12 +32,6 @@ taitriya_upanishad_config = {
32
  "lov": lambda: get_taitriya_upanishad_chapters(),
33
  "is_unique": True,
34
  },
35
- {
36
- "name": "sanskrit",
37
- "label": "Lyrics in sanskrit",
38
- "datatype": "str",
39
- "description": "The original sloka in sanskrit.",
40
- },
41
  {
42
  "name": "relative_verse_number",
43
  "label": "Relative Verse Number",
@@ -46,6 +40,13 @@ taitriya_upanishad_config = {
46
  "show_as_filter": True,
47
  "is_unique": True,
48
  },
 
 
 
 
 
 
 
49
  ],
50
  "pdf_path": "./data/taitriya_upanishad.pdf",
51
  "source": "https://stotranidhi.com/",
 
32
  "lov": lambda: get_taitriya_upanishad_chapters(),
33
  "is_unique": True,
34
  },
 
 
 
 
 
 
35
  {
36
  "name": "relative_verse_number",
37
  "label": "Relative Verse Number",
 
40
  "show_as_filter": True,
41
  "is_unique": True,
42
  },
43
+ {
44
+ "name": "sanskrit",
45
+ "label": "Lyrics in sanskrit",
46
+ "datatype": "str",
47
+ "description": "The original sloka in sanskrit.",
48
+ "show_as_filter": True,
49
+ },
50
  ],
51
  "pdf_path": "./data/taitriya_upanishad.pdf",
52
  "source": "https://stotranidhi.com/",
modules/config/vishnu_sahasranamam.py CHANGED
@@ -35,36 +35,33 @@ vishnu_sahasranamam_config = {
35
  "is_unique": True,
36
  },
37
  {
38
- "name": "page_number",
 
39
  "datatype": "int",
40
- "label": "Page Number",
41
- "description": "Page number from the source",
 
42
  },
43
  {
44
  "name": "sanskrit",
45
  "label": "Lyrics in sanskrit",
46
  "datatype": "str",
47
  "description": "The original sloka in sanskrit.",
 
48
  },
49
  {
50
  "name": "translation",
51
  "label": "English Translation",
52
  "datatype": "str",
53
  "description": "The english translation.",
 
54
  },
55
  {
56
  "name": "transliteration",
57
  "label": "English Transliteration",
58
  "datatype": "str",
59
  "description": "The english transliteration.",
60
- },
61
- {
62
- "name": "verse",
63
- "label": "Verse Number",
64
- "datatype": "int",
65
- "description": "The verse number of the sloka.",
66
  "show_as_filter": True,
67
- "is_unique": True,
68
  },
69
  ],
70
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
 
35
  "is_unique": True,
36
  },
37
  {
38
+ "name": "verse",
39
+ "label": "Verse Number",
40
  "datatype": "int",
41
+ "description": "The verse number of the sloka.",
42
+ "show_as_filter": True,
43
+ "is_unique": True,
44
  },
45
  {
46
  "name": "sanskrit",
47
  "label": "Lyrics in sanskrit",
48
  "datatype": "str",
49
  "description": "The original sloka in sanskrit.",
50
+ "show_as_filter": True,
51
  },
52
  {
53
  "name": "translation",
54
  "label": "English Translation",
55
  "datatype": "str",
56
  "description": "The english translation.",
57
+ "show_as_filter": True,
58
  },
59
  {
60
  "name": "transliteration",
61
  "label": "English Transliteration",
62
  "datatype": "str",
63
  "description": "The english transliteration.",
 
 
 
 
 
 
64
  "show_as_filter": True,
 
65
  },
66
  ],
67
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
server.py CHANGED
@@ -243,7 +243,7 @@ async def get_scripture(req: ScriptureRequest):
243
  # add unit index & total units (so Flutter can paginate)
244
  canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
245
 
246
- print("canonical_doc = ", canonical_doc)
247
  return canonical_doc
248
 
249
 
@@ -295,6 +295,7 @@ async def search_scripture(
295
  - `n_results`: number of random results to return
296
  """
297
  try:
 
298
  db = SanatanDatabase()
299
  config = next(
300
  (s for s in SanatanConfig().scriptures if s["name"] == scripture_name), None
@@ -305,14 +306,14 @@ async def search_scripture(
305
  metadata_where_clause=filter_obj,
306
  )
307
 
308
- print("results = ", results)
309
  # Flatten + canonicalize results
310
  formatted_results = []
311
  for i in range(len(results["metadatas"])):
312
  id = results["ids"][i]
313
  metadata_doc = results["metadatas"][i]
314
  metadata_doc["id"] = id
315
- print("metadata_doc = ", metadata_doc)
316
  document_text = (
317
  results["documents"][i] if results.get("documents") else None
318
  )
@@ -322,7 +323,7 @@ async def search_scripture(
322
  )
323
  formatted_results.append(canonical_doc)
324
 
325
- print("formatted_results = ", formatted_results)
326
  return {"results": formatted_results}
327
 
328
  except Exception as e:
 
243
  # add unit index & total units (so Flutter can paginate)
244
  canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
245
 
246
+ # print("canonical_doc = ", canonical_doc)
247
  return canonical_doc
248
 
249
 
 
295
  - `n_results`: number of random results to return
296
  """
297
  try:
298
+ logger.info("search_scripture: searching for %s with filters %s", scripture_name, filter_obj)
299
  db = SanatanDatabase()
300
  config = next(
301
  (s for s in SanatanConfig().scriptures if s["name"] == scripture_name), None
 
306
  metadata_where_clause=filter_obj,
307
  )
308
 
309
+ # print("results = ", results)
310
  # Flatten + canonicalize results
311
  formatted_results = []
312
  for i in range(len(results["metadatas"])):
313
  id = results["ids"][i]
314
  metadata_doc = results["metadatas"][i]
315
  metadata_doc["id"] = id
316
+ # print("metadata_doc = ", metadata_doc)
317
  document_text = (
318
  results["documents"][i] if results.get("documents") else None
319
  )
 
323
  )
324
  formatted_results.append(canonical_doc)
325
 
326
+ # print("formatted_results = ", formatted_results)
327
  return {"results": formatted_results}
328
 
329
  except Exception as e: