Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- config.py +93 -0
- copy_chromadb.py +5 -0
config.py
CHANGED
|
@@ -1050,6 +1050,99 @@ class SanatanConfig:
|
|
| 1050 |
],
|
| 1051 |
"llm_hints": [],
|
| 1052 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1053 |
]
|
| 1054 |
|
| 1055 |
def get_scripture_by_collection(self, collection_name: str):
|
|
|
|
| 1050 |
],
|
| 1051 |
"llm_hints": [],
|
| 1052 |
},
|
| 1053 |
+
{
|
| 1054 |
+
"name": "taitriya_brahmanam",
|
| 1055 |
+
"title": "Taitriya Brahmanam",
|
| 1056 |
+
"output_dir": "./output/taitriya_brahmanam",
|
| 1057 |
+
"collection_name": "taitriya_brahmanam",
|
| 1058 |
+
"collection_embedding_fn": "openai",
|
| 1059 |
+
"unit": "panchadhi",
|
| 1060 |
+
"unit_field": "panchadhi",
|
| 1061 |
+
# "chapter_order": lambda: get_chapter_order_from_taitriya_brahmanam(),
|
| 1062 |
+
"field_mapping": {
|
| 1063 |
+
"text": "sanskrit",
|
| 1064 |
+
"unit_index": "panchadhi",
|
| 1065 |
+
"transliteration": "transliteration",
|
| 1066 |
+
"chapter_name": lambda doc: (
|
| 1067 |
+
doc.get("prapaatakam_name")
|
| 1068 |
+
if doc.get("prapaatakam_name") and doc.get("prapaatakam_name") != "-"
|
| 1069 |
+
else (
|
| 1070 |
+
doc.get("ashtakam_name")
|
| 1071 |
+
if doc.get("ashtakam_name") and doc.get("ashtakam_name") != "-"
|
| 1072 |
+
else "-"
|
| 1073 |
+
)
|
| 1074 |
+
),
|
| 1075 |
+
"relative_path": lambda doc: (
|
| 1076 |
+
"-"
|
| 1077 |
+
if not doc.get("panchadhi") or doc.get("panchadhi") == "-"
|
| 1078 |
+
else f"{doc.get('ashtakam')}.{doc.get('prapaatakam')}.{doc.get('anuvakam')}.{doc.get('panchadhi_index')} || {doc.get('panchadhi_sa')} ||"
|
| 1079 |
+
),
|
| 1080 |
+
},
|
| 1081 |
+
"metadata_fields": [
|
| 1082 |
+
{
|
| 1083 |
+
"name": "ashtakam",
|
| 1084 |
+
"datatype": "int",
|
| 1085 |
+
"label": "Ashtakam Number",
|
| 1086 |
+
"description": "Ashtakam Number",
|
| 1087 |
+
"show_as_filter": True,
|
| 1088 |
+
"is_unique": True,
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"name": "prapaatakam",
|
| 1092 |
+
"datatype": "int",
|
| 1093 |
+
"label": "Prapaatakam Number",
|
| 1094 |
+
"description": "Prapaatakam Number",
|
| 1095 |
+
"show_as_filter": True,
|
| 1096 |
+
"is_unique": True,
|
| 1097 |
+
},
|
| 1098 |
+
{
|
| 1099 |
+
"name": "anuvakam",
|
| 1100 |
+
"datatype": "int",
|
| 1101 |
+
"label": "Anuvakam Number",
|
| 1102 |
+
"description": "Anuvakam Number",
|
| 1103 |
+
"show_as_filter": True,
|
| 1104 |
+
"is_unique": True,
|
| 1105 |
+
},
|
| 1106 |
+
{
|
| 1107 |
+
"name": "panchadhi_index",
|
| 1108 |
+
"datatype": "int",
|
| 1109 |
+
"label": "Relative Panchadhi Number",
|
| 1110 |
+
"description": "Relative Panchadhi Number",
|
| 1111 |
+
"show_as_filter": True,
|
| 1112 |
+
"is_unique": True,
|
| 1113 |
+
},
|
| 1114 |
+
{
|
| 1115 |
+
"name": "panchadhi",
|
| 1116 |
+
"datatype": "int",
|
| 1117 |
+
"label": "Absolute Panchadhi Number",
|
| 1118 |
+
"description": "Absolute Panchadhi Number",
|
| 1119 |
+
"show_as_filter": True,
|
| 1120 |
+
"is_unique": True,
|
| 1121 |
+
},
|
| 1122 |
+
{
|
| 1123 |
+
"name": "sanskrit",
|
| 1124 |
+
"label": "Lyrics in sanskrit",
|
| 1125 |
+
"datatype": "str",
|
| 1126 |
+
"description": "The original sloka in sanskrit.",
|
| 1127 |
+
},
|
| 1128 |
+
{
|
| 1129 |
+
"name": "transliteration",
|
| 1130 |
+
"label": "Transliteration in english",
|
| 1131 |
+
"datatype": "str",
|
| 1132 |
+
"description": "The original sloka transliterated in English.",
|
| 1133 |
+
},
|
| 1134 |
+
],
|
| 1135 |
+
"pdf_path": "./data/taitriya_brahmanam.pdf",
|
| 1136 |
+
"source": "https://sanskritdocuments.org/doc_veda/taittirIyabrAhmaNam.html",
|
| 1137 |
+
"language": "san+eng",
|
| 1138 |
+
"example_labels": [
|
| 1139 |
+
"Taitriya Brahmanam",
|
| 1140 |
+
],
|
| 1141 |
+
"examples": [
|
| 1142 |
+
"Show some verses from Taitriya Brahmanam",
|
| 1143 |
+
],
|
| 1144 |
+
"llm_hints": [],
|
| 1145 |
+
},
|
| 1146 |
]
|
| 1147 |
|
| 1148 |
def get_scripture_by_collection(self, collection_name: str):
|
copy_chromadb.py
CHANGED
|
@@ -43,6 +43,11 @@ db_config = {
|
|
| 43 |
"source_collection_name": "taitriya_samhitha",
|
| 44 |
"destination_collection_name": "taitriya_samhitha",
|
| 45 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|
|
|
|
| 43 |
"source_collection_name": "taitriya_samhitha",
|
| 44 |
"destination_collection_name": "taitriya_samhitha",
|
| 45 |
},
|
| 46 |
+
"taitriya_brahmanam": {
|
| 47 |
+
"source_db_path": "../taitriya_brahmanam_ai/chromadb_store",
|
| 48 |
+
"source_collection_name": "taitriya_brahmanam",
|
| 49 |
+
"destination_collection_name": "taitriya_brahmanam",
|
| 50 |
+
},
|
| 51 |
}
|
| 52 |
|
| 53 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|