vikramvasudevan commited on
Commit
dbbdb8c
·
verified ·
1 Parent(s): f3f8f57

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.py +81 -0
  2. copy_chromadb.py +5 -0
config.py CHANGED
@@ -1143,6 +1143,87 @@ class SanatanConfig:
1143
  ],
1144
  "llm_hints": [],
1145
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1146
  ]
1147
 
1148
  def get_scripture_by_collection(self, collection_name: str):
 
1143
  ],
1144
  "llm_hints": [],
1145
  },
1146
+ {
1147
+ "name": "katakam",
1148
+ "title": "Katakam",
1149
+ "output_dir": "./output/katakam",
1150
+ "collection_name": "katakam",
1151
+ "collection_embedding_fn": "openai",
1152
+ "unit": "panchadhi",
1153
+ "unit_field": "panchadhi",
1154
+ # "chapter_order": lambda: get_chapter_order_from_katakam(),
1155
+ "field_mapping": {
1156
+ "text": "sanskrit",
1157
+ "unit_index": "panchadhi",
1158
+ "transliteration": "transliteration",
1159
+ "chapter_name": lambda doc: (
1160
+ doc.get("prapaatakam_name")
1161
+ if doc.get("prapaatakam_name") and doc.get("prapaatakam_name") != "-"
1162
+ else "॥ काठकम् ॥"
1163
+ ),
1164
+ "relative_path": lambda doc: (
1165
+ "-"
1166
+ if not doc.get("panchadhi") or doc.get("panchadhi") == "-"
1167
+ else f"{doc.get('prapaatakam')}.{doc.get('anuvakam')}.{doc.get('panchadhi_index')} || {doc.get('panchadhi_sa')} ||"
1168
+ ),
1169
+ },
1170
+ "metadata_fields": [
1171
+ {
1172
+ "name": "prapaatakam",
1173
+ "datatype": "int",
1174
+ "label": "Prapaatakam Number",
1175
+ "description": "Prapaatakam Number",
1176
+ "show_as_filter": True,
1177
+ "is_unique": True,
1178
+ },
1179
+ {
1180
+ "name": "anuvakam",
1181
+ "datatype": "int",
1182
+ "label": "Anuvakam Number",
1183
+ "description": "Anuvakam Number",
1184
+ "show_as_filter": True,
1185
+ "is_unique": True,
1186
+ },
1187
+ {
1188
+ "name": "panchadhi_index",
1189
+ "datatype": "int",
1190
+ "label": "Relative Panchadhi Number",
1191
+ "description": "Relative Panchadhi Number",
1192
+ "show_as_filter": True,
1193
+ "is_unique": True,
1194
+ },
1195
+ {
1196
+ "name": "panchadhi",
1197
+ "datatype": "int",
1198
+ "label": "Absolute Panchadhi Number",
1199
+ "description": "Absolute Panchadhi Number",
1200
+ "show_as_filter": True,
1201
+ "is_unique": True,
1202
+ },
1203
+ {
1204
+ "name": "sanskrit",
1205
+ "label": "Lyrics in sanskrit",
1206
+ "datatype": "str",
1207
+ "description": "The original sloka in sanskrit.",
1208
+ },
1209
+ {
1210
+ "name": "transliteration",
1211
+ "label": "Transliteration in english",
1212
+ "datatype": "str",
1213
+ "description": "The original sloka transliterated in English.",
1214
+ },
1215
+ ],
1216
+ "pdf_path": "./data/katakam.pdf",
1217
+ "source": "https://sanskritdocuments.org/doc_veda/taittirIyabrAhmaNam.html",
1218
+ "language": "san+eng",
1219
+ "example_labels": [
1220
+ "Katakam",
1221
+ ],
1222
+ "examples": [
1223
+ "Show some verses from Katakam",
1224
+ ],
1225
+ "llm_hints": [],
1226
+ },
1227
  ]
1228
 
1229
  def get_scripture_by_collection(self, collection_name: str):
copy_chromadb.py CHANGED
@@ -48,6 +48,11 @@ db_config = {
48
  "source_collection_name": "taitriya_brahmanam",
49
  "destination_collection_name": "taitriya_brahmanam",
50
  },
 
 
 
 
 
51
  }
52
 
53
  parser = argparse.ArgumentParser(description="My app with database parameter")
 
48
  "source_collection_name": "taitriya_brahmanam",
49
  "destination_collection_name": "taitriya_brahmanam",
50
  },
51
+ "katakam": {
52
+ "source_db_path": "../taitriya_brahmanam_ai/chromadb_store",
53
+ "source_collection_name": "katakam",
54
+ "destination_collection_name": "katakam",
55
+ },
56
  }
57
 
58
  parser = argparse.ArgumentParser(description="My app with database parameter")