Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
config.py
CHANGED
|
@@ -15,11 +15,17 @@ class SanatanConfig:
|
|
| 15 |
"metadata_fields": [
|
| 16 |
{
|
| 17 |
"name": "file",
|
|
|
|
| 18 |
"datatype": "str",
|
| 19 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
},
|
| 21 |
-
{"name": "num_chars", "datatype": "str"},
|
| 22 |
-
{"name": "page", "datatype": "int"},
|
| 23 |
],
|
| 24 |
"pdf_path": "./data/vishnu_puranam.pdf",
|
| 25 |
"source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf",
|
|
@@ -49,11 +55,17 @@ class SanatanConfig:
|
|
| 49 |
"metadata_fields": [
|
| 50 |
{
|
| 51 |
"name": "file",
|
|
|
|
| 52 |
"datatype": "str",
|
| 53 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
},
|
| 55 |
-
{"name": "num_chars", "datatype": "str"},
|
| 56 |
-
{"name": "page", "datatype": "int"},
|
| 57 |
],
|
| 58 |
"pdf_path": "./data/shukla-yajur-veda.pdf",
|
| 59 |
"source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf",
|
|
@@ -80,11 +92,17 @@ class SanatanConfig:
|
|
| 80 |
"metadata_fields": [
|
| 81 |
{
|
| 82 |
"name": "file",
|
|
|
|
| 83 |
"datatype": "str",
|
| 84 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
},
|
| 86 |
-
{"name": "num_chars", "datatype": "str"},
|
| 87 |
-
{"name": "page", "datatype": "int"},
|
| 88 |
],
|
| 89 |
"pdf_path": "./data/bhagavat_gita.pdf",
|
| 90 |
"source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf",
|
|
@@ -117,11 +135,17 @@ class SanatanConfig:
|
|
| 117 |
"metadata_fields": [
|
| 118 |
{
|
| 119 |
"name": "file",
|
|
|
|
| 120 |
"datatype": "str",
|
| 121 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
},
|
| 123 |
-
{"name": "num_chars", "datatype": "str"},
|
| 124 |
-
{"name": "page", "datatype": "int"},
|
| 125 |
],
|
| 126 |
"pdf_path": "./data/valmiki_ramayanam.pdf",
|
| 127 |
"source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf",
|
|
@@ -163,27 +187,45 @@ class SanatanConfig:
|
|
| 163 |
"relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
|
| 164 |
},
|
| 165 |
"metadata_fields": [
|
| 166 |
-
{
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
{
|
| 169 |
"name": "sanskrit",
|
|
|
|
| 170 |
"datatype": "str",
|
| 171 |
-
"
|
|
|
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"name": "translation",
|
|
|
|
| 175 |
"datatype": "str",
|
| 176 |
-
"
|
|
|
|
| 177 |
},
|
| 178 |
{
|
| 179 |
"name": "transliteration",
|
|
|
|
| 180 |
"datatype": "str",
|
| 181 |
-
"
|
|
|
|
| 182 |
},
|
| 183 |
{
|
| 184 |
"name": "verse",
|
|
|
|
| 185 |
"datatype": "int",
|
| 186 |
-
"
|
|
|
|
| 187 |
},
|
| 188 |
],
|
| 189 |
"pdf_path": "./data/vishnu_sahasranamam.pdf",
|
|
@@ -231,29 +273,35 @@ class SanatanConfig:
|
|
| 231 |
"metadata_fields": [
|
| 232 |
{
|
| 233 |
"name": "prabandham_code",
|
|
|
|
| 234 |
"datatype": "str",
|
| 235 |
"description": "contains the short prabandham_code. e.g. `TPL` for `Thiruppallandu`",
|
|
|
|
| 236 |
},
|
| 237 |
{
|
| 238 |
"name": "prabandham_name",
|
|
|
|
| 239 |
"datatype": "str",
|
| 240 |
"description": "contains the prabandham name. e.g. `Thiruppallandu`",
|
|
|
|
| 241 |
},
|
| 242 |
{
|
| 243 |
"name": "azhwar_name",
|
|
|
|
| 244 |
"datatype": "str",
|
| 245 |
"description": "contains the azhwar name. e.g. `Thirumangai Azhwar`",
|
|
|
|
| 246 |
},
|
| 247 |
{
|
| 248 |
"name": "divya_desams",
|
|
|
|
| 249 |
"datatype": "str",
|
| 250 |
"description": "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni.",
|
|
|
|
| 251 |
},
|
| 252 |
-
# {"name": "html_url", "datatype": "str", "description" : "Reference link for the source"},
|
| 253 |
-
# {"name": "pasuram_en", "datatype": "str", "description" : "Transliteration of pasuram in english"},
|
| 254 |
-
# {"name": "pasuram_ta", "datatype": "str", "description" : "Pasuram lyrics in tamil"},
|
| 255 |
{
|
| 256 |
"name": "title",
|
|
|
|
| 257 |
"datatype": "str",
|
| 258 |
"description": (
|
| 259 |
"Exact title of a pasuram in one of the following formats:\n"
|
|
@@ -267,9 +315,11 @@ class SanatanConfig:
|
|
| 267 |
" → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n"
|
| 268 |
"Do NOT use `title` for general queries or keyword searches — leave it empty in those cases."
|
| 269 |
),
|
|
|
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"name": "verse",
|
|
|
|
| 273 |
"datatype": "int",
|
| 274 |
"is_unique": True,
|
| 275 |
"description": (
|
|
@@ -277,30 +327,36 @@ class SanatanConfig:
|
|
| 277 |
# "Use it only when a specific prabandham name is NOT mentioned in the user query."
|
| 278 |
"For e.g. 'Give me pasuram 1176'"
|
| 279 |
),
|
|
|
|
| 280 |
},
|
| 281 |
-
# {"name": "wbw_ta", "datatype": "str", "description" : "Word by word meaning in tamil."},
|
| 282 |
{
|
| 283 |
"name": "decade",
|
|
|
|
| 284 |
"datatype": "int",
|
| 285 |
"description": (
|
| 286 |
"The decade (or `pathu` in Tamil) that this pasuram belongs to. decade is -1 when there is no associated decade."
|
| 287 |
),
|
|
|
|
| 288 |
},
|
| 289 |
{
|
| 290 |
"name": "chapter",
|
|
|
|
| 291 |
"datatype": "int",
|
| 292 |
"description": (
|
| 293 |
"chapter number of this pasuram. is -1 when there is no associated chapter number"
|
| 294 |
),
|
|
|
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"name": "position_in_chapter",
|
|
|
|
| 298 |
"datatype": "int",
|
| 299 |
"description": (
|
| 300 |
"Relative verse number or pasuram number within a chapter."
|
| 301 |
"Use it only when a specific prabandham name is mentioned in the user query."
|
| 302 |
"For e.g. 'Give me the 5th pasuram from Thirupavai'"
|
| 303 |
),
|
|
|
|
| 304 |
},
|
| 305 |
],
|
| 306 |
"pdf_path": "./data/divya_prabandham.pdf",
|
|
@@ -339,11 +395,17 @@ class SanatanConfig:
|
|
| 339 |
"metadata_fields": [
|
| 340 |
{
|
| 341 |
"name": "file",
|
|
|
|
| 342 |
"datatype": "str",
|
| 343 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
},
|
| 345 |
-
{"name": "num_chars", "datatype": "str"},
|
| 346 |
-
{"name": "page", "datatype": "int"},
|
| 347 |
],
|
| 348 |
"pdf_path": "./data/bhagavata_purana.pdf",
|
| 349 |
"source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf",
|
|
@@ -373,21 +435,38 @@ class SanatanConfig:
|
|
| 373 |
"metadata_fields": [
|
| 374 |
{
|
| 375 |
"name": "kandam",
|
|
|
|
| 376 |
"datatype": "str",
|
| 377 |
"description": "The name of the Kandam or the chapter.",
|
|
|
|
| 378 |
},
|
| 379 |
{
|
| 380 |
"name": "padalam_en",
|
|
|
|
| 381 |
"datatype": "str",
|
| 382 |
"description": "The name of the Padalam (Episode) in English.",
|
|
|
|
| 383 |
},
|
| 384 |
{
|
| 385 |
"name": "padalam_ta",
|
|
|
|
| 386 |
"datatype": "str",
|
| 387 |
"description": "The name of the Padalam (Episode) in Tamil.",
|
| 388 |
},
|
| 389 |
-
{
|
| 390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
],
|
| 392 |
"pdf_path": "./data/kamba_ramayanam.pdf",
|
| 393 |
"source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf",
|
|
@@ -426,11 +505,14 @@ class SanatanConfig:
|
|
| 426 |
"metadata_fields": [
|
| 427 |
{
|
| 428 |
"name": "chunk_index",
|
|
|
|
| 429 |
"datatype": "int",
|
| 430 |
"description": "The index of the chunk",
|
|
|
|
| 431 |
},
|
| 432 |
{
|
| 433 |
"name": "filename",
|
|
|
|
| 434 |
"datatype": "str",
|
| 435 |
"description": "The name of the file.",
|
| 436 |
},
|
|
@@ -473,13 +555,17 @@ class SanatanConfig:
|
|
| 473 |
"metadata_fields": [
|
| 474 |
{
|
| 475 |
"name": "sloka_number",
|
|
|
|
| 476 |
"datatype": "int",
|
| 477 |
"description": "The index of the sloka or verse",
|
|
|
|
| 478 |
},
|
| 479 |
{
|
| 480 |
"name": "meaning_short",
|
|
|
|
| 481 |
"datatype": "str",
|
| 482 |
"description": "A short meaning of the sanskrit verse in English.",
|
|
|
|
| 483 |
},
|
| 484 |
],
|
| 485 |
"pdf_path": "./data/chathusloki.pdf",
|
|
@@ -512,23 +598,30 @@ class SanatanConfig:
|
|
| 512 |
"metadata_fields": [
|
| 513 |
{
|
| 514 |
"name": "sloka_number",
|
|
|
|
| 515 |
"datatype": "int",
|
| 516 |
"description": "The index of the sloka or verse",
|
|
|
|
| 517 |
},
|
| 518 |
{
|
| 519 |
"name": "meaning_short",
|
|
|
|
| 520 |
"datatype": "str",
|
| 521 |
"description": "A short meaning of the sanskrit verse in English.",
|
|
|
|
| 522 |
},
|
| 523 |
{
|
| 524 |
"name": "sanskrit",
|
|
|
|
| 525 |
"datatype": "str",
|
| 526 |
"description": "Verse in sanskrit",
|
| 527 |
},
|
| 528 |
{
|
| 529 |
"name": "transliteration",
|
|
|
|
| 530 |
"datatype": "str",
|
| 531 |
"description": "Verse transliterated to English",
|
|
|
|
| 532 |
},
|
| 533 |
],
|
| 534 |
"pdf_path": "./data/sri_stavam.pdf",
|
|
@@ -560,28 +653,37 @@ class SanatanConfig:
|
|
| 560 |
"metadata_fields": [
|
| 561 |
{
|
| 562 |
"name": "video_id",
|
|
|
|
| 563 |
"datatype": "str",
|
| 564 |
"description": "The video id as in YouTube",
|
|
|
|
| 565 |
},
|
| 566 |
{
|
| 567 |
"name": "video_title",
|
|
|
|
| 568 |
"datatype": "str",
|
| 569 |
"description": "The title of the video as in YouTube",
|
|
|
|
| 570 |
},
|
| 571 |
{
|
| 572 |
"name": "description",
|
|
|
|
| 573 |
"datatype": "str",
|
| 574 |
"description": "Description as in YouTube",
|
|
|
|
| 575 |
},
|
| 576 |
{
|
| 577 |
"name": "channel_url",
|
|
|
|
| 578 |
"datatype": "str",
|
| 579 |
"description": "URL of the YouTube Channel",
|
| 580 |
},
|
| 581 |
{
|
| 582 |
"name": "channel_title",
|
|
|
|
| 583 |
"datatype": "str",
|
| 584 |
"description": "Title of the YouTube Channel",
|
|
|
|
| 585 |
},
|
| 586 |
],
|
| 587 |
"pdf_path": "./data/none.pdf",
|
|
|
|
| 15 |
"metadata_fields": [
|
| 16 |
{
|
| 17 |
"name": "file",
|
| 18 |
+
"label": "File Name",
|
| 19 |
"datatype": "str",
|
| 20 |
+
"description": "name of the file from which the information was extracted",
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"name": "page",
|
| 24 |
+
"datatype": "int",
|
| 25 |
+
"label": "Page Number",
|
| 26 |
+
"description": "Page number from the source",
|
| 27 |
+
"show_as_filter" : True
|
| 28 |
},
|
|
|
|
|
|
|
| 29 |
],
|
| 30 |
"pdf_path": "./data/vishnu_puranam.pdf",
|
| 31 |
"source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf",
|
|
|
|
| 55 |
"metadata_fields": [
|
| 56 |
{
|
| 57 |
"name": "file",
|
| 58 |
+
"label": "File Name",
|
| 59 |
"datatype": "str",
|
| 60 |
+
"description": "name of the file from which the information was extracted",
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "page",
|
| 64 |
+
"datatype": "int",
|
| 65 |
+
"label": "Page Number",
|
| 66 |
+
"description": "Page number from the source",
|
| 67 |
+
"show_as_filter" : True
|
| 68 |
},
|
|
|
|
|
|
|
| 69 |
],
|
| 70 |
"pdf_path": "./data/shukla-yajur-veda.pdf",
|
| 71 |
"source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf",
|
|
|
|
| 92 |
"metadata_fields": [
|
| 93 |
{
|
| 94 |
"name": "file",
|
| 95 |
+
"label": "File Name",
|
| 96 |
"datatype": "str",
|
| 97 |
+
"description": "name of the file from which the information was extracted",
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"name": "page",
|
| 101 |
+
"datatype": "int",
|
| 102 |
+
"label": "Page Number",
|
| 103 |
+
"description": "Page number from the source",
|
| 104 |
+
"show_as_filter" : True
|
| 105 |
},
|
|
|
|
|
|
|
| 106 |
],
|
| 107 |
"pdf_path": "./data/bhagavat_gita.pdf",
|
| 108 |
"source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf",
|
|
|
|
| 135 |
"metadata_fields": [
|
| 136 |
{
|
| 137 |
"name": "file",
|
| 138 |
+
"label": "File Name",
|
| 139 |
"datatype": "str",
|
| 140 |
+
"description": "name of the file from which the information was extracted",
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"name": "page",
|
| 144 |
+
"datatype": "int",
|
| 145 |
+
"label": "Page Number",
|
| 146 |
+
"description": "Page number from the source",
|
| 147 |
+
"show_as_filter" : True
|
| 148 |
},
|
|
|
|
|
|
|
| 149 |
],
|
| 150 |
"pdf_path": "./data/valmiki_ramayanam.pdf",
|
| 151 |
"source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf",
|
|
|
|
| 187 |
"relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
|
| 188 |
},
|
| 189 |
"metadata_fields": [
|
| 190 |
+
{
|
| 191 |
+
"name": "chapter",
|
| 192 |
+
"datatype": "str",
|
| 193 |
+
"label": "Chapter Name",
|
| 194 |
+
"description": "Name of the Chapter",
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"name": "page_number",
|
| 198 |
+
"datatype": "int",
|
| 199 |
+
"label": "Page Number",
|
| 200 |
+
"description": "Page number from the source",
|
| 201 |
+
},
|
| 202 |
{
|
| 203 |
"name": "sanskrit",
|
| 204 |
+
"label": "Lyrics in sanskrit",
|
| 205 |
"datatype": "str",
|
| 206 |
+
"description": "The original sloka in sanskrit.",
|
| 207 |
+
"show_as_filter" : True
|
| 208 |
},
|
| 209 |
{
|
| 210 |
"name": "translation",
|
| 211 |
+
"label": "English Translation",
|
| 212 |
"datatype": "str",
|
| 213 |
+
"description": "The english translation.",
|
| 214 |
+
"show_as_filter" : True
|
| 215 |
},
|
| 216 |
{
|
| 217 |
"name": "transliteration",
|
| 218 |
+
"label": "English Transliteration",
|
| 219 |
"datatype": "str",
|
| 220 |
+
"description": "The english transliteration.",
|
| 221 |
+
"show_as_filter" : True
|
| 222 |
},
|
| 223 |
{
|
| 224 |
"name": "verse",
|
| 225 |
+
"labek": "Verse Number",
|
| 226 |
"datatype": "int",
|
| 227 |
+
"description": "The verse number of the sloka.",
|
| 228 |
+
"show_as_filter" : True
|
| 229 |
},
|
| 230 |
],
|
| 231 |
"pdf_path": "./data/vishnu_sahasranamam.pdf",
|
|
|
|
| 273 |
"metadata_fields": [
|
| 274 |
{
|
| 275 |
"name": "prabandham_code",
|
| 276 |
+
"label": "Prabandham Code",
|
| 277 |
"datatype": "str",
|
| 278 |
"description": "contains the short prabandham_code. e.g. `TPL` for `Thiruppallandu`",
|
| 279 |
+
"show_as_filter" : True
|
| 280 |
},
|
| 281 |
{
|
| 282 |
"name": "prabandham_name",
|
| 283 |
+
"label": "Prabandham Name",
|
| 284 |
"datatype": "str",
|
| 285 |
"description": "contains the prabandham name. e.g. `Thiruppallandu`",
|
| 286 |
+
"show_as_filter" : True
|
| 287 |
},
|
| 288 |
{
|
| 289 |
"name": "azhwar_name",
|
| 290 |
+
"label": "Azhwar Name",
|
| 291 |
"datatype": "str",
|
| 292 |
"description": "contains the azhwar name. e.g. `Thirumangai Azhwar`",
|
| 293 |
+
"show_as_filter" : True
|
| 294 |
},
|
| 295 |
{
|
| 296 |
"name": "divya_desams",
|
| 297 |
+
"label": "Divya Desams",
|
| 298 |
"datatype": "str",
|
| 299 |
"description": "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni.",
|
| 300 |
+
"show_as_filter" : True
|
| 301 |
},
|
|
|
|
|
|
|
|
|
|
| 302 |
{
|
| 303 |
"name": "title",
|
| 304 |
+
"label": "Pasuram Title",
|
| 305 |
"datatype": "str",
|
| 306 |
"description": (
|
| 307 |
"Exact title of a pasuram in one of the following formats:\n"
|
|
|
|
| 315 |
" → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n"
|
| 316 |
"Do NOT use `title` for general queries or keyword searches — leave it empty in those cases."
|
| 317 |
),
|
| 318 |
+
"show_as_filter" : True
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"name": "verse",
|
| 322 |
+
"label": "Absolute Pasuram Number",
|
| 323 |
"datatype": "int",
|
| 324 |
"is_unique": True,
|
| 325 |
"description": (
|
|
|
|
| 327 |
# "Use it only when a specific prabandham name is NOT mentioned in the user query."
|
| 328 |
"For e.g. 'Give me pasuram 1176'"
|
| 329 |
),
|
| 330 |
+
"show_as_filter" : True
|
| 331 |
},
|
|
|
|
| 332 |
{
|
| 333 |
"name": "decade",
|
| 334 |
+
"label": "Decade Number (Pathu)",
|
| 335 |
"datatype": "int",
|
| 336 |
"description": (
|
| 337 |
"The decade (or `pathu` in Tamil) that this pasuram belongs to. decade is -1 when there is no associated decade."
|
| 338 |
),
|
| 339 |
+
"show_as_filter" : True
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"name": "chapter",
|
| 343 |
+
"label": "Chapter Number",
|
| 344 |
"datatype": "int",
|
| 345 |
"description": (
|
| 346 |
"chapter number of this pasuram. is -1 when there is no associated chapter number"
|
| 347 |
),
|
| 348 |
+
"show_as_filter" : True
|
| 349 |
},
|
| 350 |
{
|
| 351 |
"name": "position_in_chapter",
|
| 352 |
+
"label": "Relative Pasuram Number",
|
| 353 |
"datatype": "int",
|
| 354 |
"description": (
|
| 355 |
"Relative verse number or pasuram number within a chapter."
|
| 356 |
"Use it only when a specific prabandham name is mentioned in the user query."
|
| 357 |
"For e.g. 'Give me the 5th pasuram from Thirupavai'"
|
| 358 |
),
|
| 359 |
+
"show_as_filter" : True
|
| 360 |
},
|
| 361 |
],
|
| 362 |
"pdf_path": "./data/divya_prabandham.pdf",
|
|
|
|
| 395 |
"metadata_fields": [
|
| 396 |
{
|
| 397 |
"name": "file",
|
| 398 |
+
"label": "File Name",
|
| 399 |
"datatype": "str",
|
| 400 |
+
"description": "name of the file from which the information was extracted",
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"name": "page",
|
| 404 |
+
"datatype": "int",
|
| 405 |
+
"label": "Page Number",
|
| 406 |
+
"description": "Page number from the source",
|
| 407 |
+
"show_as_filter" : True
|
| 408 |
},
|
|
|
|
|
|
|
| 409 |
],
|
| 410 |
"pdf_path": "./data/bhagavata_purana.pdf",
|
| 411 |
"source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf",
|
|
|
|
| 435 |
"metadata_fields": [
|
| 436 |
{
|
| 437 |
"name": "kandam",
|
| 438 |
+
"label": "Kandam",
|
| 439 |
"datatype": "str",
|
| 440 |
"description": "The name of the Kandam or the chapter.",
|
| 441 |
+
"show_as_filter" : True
|
| 442 |
},
|
| 443 |
{
|
| 444 |
"name": "padalam_en",
|
| 445 |
+
"label": "Chapter Name",
|
| 446 |
"datatype": "str",
|
| 447 |
"description": "The name of the Padalam (Episode) in English.",
|
| 448 |
+
"show_as_filter" : True
|
| 449 |
},
|
| 450 |
{
|
| 451 |
"name": "padalam_ta",
|
| 452 |
+
"label": "Padalam name in Tamil",
|
| 453 |
"datatype": "str",
|
| 454 |
"description": "The name of the Padalam (Episode) in Tamil.",
|
| 455 |
},
|
| 456 |
+
{
|
| 457 |
+
"name": "page",
|
| 458 |
+
"datatype": "int",
|
| 459 |
+
"label": "Page Number",
|
| 460 |
+
"description": "Page number from the source",
|
| 461 |
+
"show_as_filter" : True
|
| 462 |
+
},
|
| 463 |
+
{
|
| 464 |
+
"name": "verse_number",
|
| 465 |
+
"datatype": "int",
|
| 466 |
+
"label": "Verse Number",
|
| 467 |
+
"description": "Verse Number",
|
| 468 |
+
"show_as_filter" : True
|
| 469 |
+
},
|
| 470 |
],
|
| 471 |
"pdf_path": "./data/kamba_ramayanam.pdf",
|
| 472 |
"source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf",
|
|
|
|
| 505 |
"metadata_fields": [
|
| 506 |
{
|
| 507 |
"name": "chunk_index",
|
| 508 |
+
"label" : "Page Index",
|
| 509 |
"datatype": "int",
|
| 510 |
"description": "The index of the chunk",
|
| 511 |
+
"show_as_filter" : True
|
| 512 |
},
|
| 513 |
{
|
| 514 |
"name": "filename",
|
| 515 |
+
"label" : "File name from the source",
|
| 516 |
"datatype": "str",
|
| 517 |
"description": "The name of the file.",
|
| 518 |
},
|
|
|
|
| 555 |
"metadata_fields": [
|
| 556 |
{
|
| 557 |
"name": "sloka_number",
|
| 558 |
+
"label" : "Slokam Number",
|
| 559 |
"datatype": "int",
|
| 560 |
"description": "The index of the sloka or verse",
|
| 561 |
+
"show_as_filter" : True
|
| 562 |
},
|
| 563 |
{
|
| 564 |
"name": "meaning_short",
|
| 565 |
+
"label" : "Short meaning",
|
| 566 |
"datatype": "str",
|
| 567 |
"description": "A short meaning of the sanskrit verse in English.",
|
| 568 |
+
"show_as_filter" : True
|
| 569 |
},
|
| 570 |
],
|
| 571 |
"pdf_path": "./data/chathusloki.pdf",
|
|
|
|
| 598 |
"metadata_fields": [
|
| 599 |
{
|
| 600 |
"name": "sloka_number",
|
| 601 |
+
"label" : "Slokam Number",
|
| 602 |
"datatype": "int",
|
| 603 |
"description": "The index of the sloka or verse",
|
| 604 |
+
"show_as_filter" : True
|
| 605 |
},
|
| 606 |
{
|
| 607 |
"name": "meaning_short",
|
| 608 |
+
"label" : "Short meaning",
|
| 609 |
"datatype": "str",
|
| 610 |
"description": "A short meaning of the sanskrit verse in English.",
|
| 611 |
+
"show_as_filter" : True
|
| 612 |
},
|
| 613 |
{
|
| 614 |
"name": "sanskrit",
|
| 615 |
+
"label" : "Lyrics in sanskrit",
|
| 616 |
"datatype": "str",
|
| 617 |
"description": "Verse in sanskrit",
|
| 618 |
},
|
| 619 |
{
|
| 620 |
"name": "transliteration",
|
| 621 |
+
"label" : "English Transliteration",
|
| 622 |
"datatype": "str",
|
| 623 |
"description": "Verse transliterated to English",
|
| 624 |
+
"show_as_filter" : True
|
| 625 |
},
|
| 626 |
],
|
| 627 |
"pdf_path": "./data/sri_stavam.pdf",
|
|
|
|
| 653 |
"metadata_fields": [
|
| 654 |
{
|
| 655 |
"name": "video_id",
|
| 656 |
+
"label" : "Video ID",
|
| 657 |
"datatype": "str",
|
| 658 |
"description": "The video id as in YouTube",
|
| 659 |
+
"show_as_filter" : True
|
| 660 |
},
|
| 661 |
{
|
| 662 |
"name": "video_title",
|
| 663 |
+
"label" : "Video Title",
|
| 664 |
"datatype": "str",
|
| 665 |
"description": "The title of the video as in YouTube",
|
| 666 |
+
"show_as_filter" : True
|
| 667 |
},
|
| 668 |
{
|
| 669 |
"name": "description",
|
| 670 |
+
"label" : "Video Description",
|
| 671 |
"datatype": "str",
|
| 672 |
"description": "Description as in YouTube",
|
| 673 |
+
"show_as_filter" : True
|
| 674 |
},
|
| 675 |
{
|
| 676 |
"name": "channel_url",
|
| 677 |
+
"label" : "Channel URL",
|
| 678 |
"datatype": "str",
|
| 679 |
"description": "URL of the YouTube Channel",
|
| 680 |
},
|
| 681 |
{
|
| 682 |
"name": "channel_title",
|
| 683 |
+
"label" : "Channel Title",
|
| 684 |
"datatype": "str",
|
| 685 |
"description": "Title of the YouTube Channel",
|
| 686 |
+
"show_as_filter" : True
|
| 687 |
},
|
| 688 |
],
|
| 689 |
"pdf_path": "./data/none.pdf",
|
db.py
CHANGED
|
@@ -151,7 +151,7 @@ class SanatanDatabase:
|
|
| 151 |
result = {"document": documents[0]}
|
| 152 |
if metadatas:
|
| 153 |
result.update(metadatas[0])
|
| 154 |
-
print("raw data = ", result)
|
| 155 |
return result
|
| 156 |
else:
|
| 157 |
print("No data available")
|
|
|
|
| 151 |
result = {"document": documents[0]}
|
| 152 |
if metadatas:
|
| 153 |
result.update(metadatas[0])
|
| 154 |
+
# print("raw data = ", result)
|
| 155 |
return result
|
| 156 |
else:
|
| 157 |
print("No data available")
|
server.py
CHANGED
|
@@ -11,6 +11,7 @@ from pydantic import BaseModel
|
|
| 11 |
from chat_utils import chat
|
| 12 |
from config import SanatanConfig
|
| 13 |
from db import SanatanDatabase
|
|
|
|
| 14 |
from modules.quiz.answer_validator import validate_answer
|
| 15 |
from modules.quiz.models import Question
|
| 16 |
from modules.quiz.quiz_helper import generate_question
|
|
@@ -245,10 +246,54 @@ async def get_scripture_configs():
|
|
| 245 |
"name": s["name"], # e.g. "bhagavad_gita"
|
| 246 |
"title": s["title"], # e.g. "Bhagavad Gita"
|
| 247 |
"unit": s["unit"], # e.g. "verse" or "page"
|
|
|
|
| 248 |
"total": num_units,
|
| 249 |
"enabled": "field_mapping" in s,
|
| 250 |
-
"source"
|
| 251 |
"credits": s.get("credits", f"{s.get("source","")}"),
|
|
|
|
| 252 |
}
|
| 253 |
)
|
| 254 |
return {"scriptures": sorted(scriptures, key=lambda s: s["title"])}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from chat_utils import chat
|
| 12 |
from config import SanatanConfig
|
| 13 |
from db import SanatanDatabase
|
| 14 |
+
from metadata import MetadataWhereClause
|
| 15 |
from modules.quiz.answer_validator import validate_answer
|
| 16 |
from modules.quiz.models import Question
|
| 17 |
from modules.quiz.quiz_helper import generate_question
|
|
|
|
| 246 |
"name": s["name"], # e.g. "bhagavad_gita"
|
| 247 |
"title": s["title"], # e.g. "Bhagavad Gita"
|
| 248 |
"unit": s["unit"], # e.g. "verse" or "page"
|
| 249 |
+
"unit_field": s.get("unit_field", s.get("unit")),
|
| 250 |
"total": num_units,
|
| 251 |
"enabled": "field_mapping" in s,
|
| 252 |
+
"source": s.get("source", ""),
|
| 253 |
"credits": s.get("credits", f"{s.get("source","")}"),
|
| 254 |
+
"metadata_fields": s.get("metadata_fields", []),
|
| 255 |
}
|
| 256 |
)
|
| 257 |
return {"scriptures": sorted(scriptures, key=lambda s: s["title"])}
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
@router.post("/scripture/{scripture_name}/search")
|
| 261 |
+
async def search_scripture(
|
| 262 |
+
scripture_name: str,
|
| 263 |
+
filter_obj: Optional[MetadataWhereClause] = None,
|
| 264 |
+
n_results: int = 1,
|
| 265 |
+
):
|
| 266 |
+
"""
|
| 267 |
+
Search scripture collection with optional filters.
|
| 268 |
+
- `scripture_name`: Name of the collection
|
| 269 |
+
- `filter_obj`: MetadataWhereClause (filters, groups, operator)
|
| 270 |
+
- `n_results`: number of random results to return
|
| 271 |
+
"""
|
| 272 |
+
try:
|
| 273 |
+
db = SanatanDatabase()
|
| 274 |
+
results = db.fetch_random_data(
|
| 275 |
+
collection_name=scripture_name,
|
| 276 |
+
metadata_where_clause=filter_obj,
|
| 277 |
+
n_results=n_results,
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
print("results = ", results)
|
| 281 |
+
# Flatten + canonicalize results
|
| 282 |
+
formatted_results = []
|
| 283 |
+
for i in range(len(results["metadatas"])):
|
| 284 |
+
metadata_doc = results["metadatas"][i]
|
| 285 |
+
document_text = (
|
| 286 |
+
results["documents"][i] if results.get("documents") else None
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
canonical_doc = SanatanConfig().canonicalize_document(
|
| 290 |
+
scripture_name, document_text, metadata_doc
|
| 291 |
+
)
|
| 292 |
+
formatted_results.append(canonical_doc)
|
| 293 |
+
|
| 294 |
+
# print("formatted_results = ", formatted_results)
|
| 295 |
+
return {"results": formatted_results}
|
| 296 |
+
|
| 297 |
+
except Exception as e:
|
| 298 |
+
print("Error while searching ", e)
|
| 299 |
+
return {"error": str(e)}
|