Spaces:
Runtime error
Runtime error
| import json | |
| import chromadb | |
| from tqdm import tqdm | |
| from embeddings import get_embedding | |
| # ===== SETTINGS ===== | |
| JSON_FILE = "./output/sri_stavam/sri_stavam_detailed.json" # your JSON file path | |
| COLLECTION_NAME = "sri_stavam" | |
| # Load the JSON data | |
| with open(JSON_FILE, "r", encoding="utf-8") as f: | |
| slokas = json.load(f) | |
| # Start Chroma DB client (can persist to disk or run in-memory) | |
| client = chromadb.PersistentClient(path="./chromadb-store") # persistent | |
| # OR: client = chromadb.Client() # in-memory only | |
| # Get or create the collection | |
| try: | |
| client.delete_collection(name=COLLECTION_NAME) | |
| except: | |
| pass | |
| collection = client.get_or_create_collection(name=COLLECTION_NAME) | |
| # Prepare and insert each sloka | |
| ids = [] | |
| documents = [] | |
| embeddings = [] | |
| metadatas = [] | |
| for id, sloka in tqdm(enumerate(slokas)): | |
| sloka_num = sloka.get("verse", 0) | |
| # Combine fields into one searchable text blob | |
| text_blob = ( | |
| f"Sloka {sloka_num}\n\n" | |
| f"Translation:\n{sloka['translation']}\n\n" | |
| f"Commentary:\n{sloka['commentary']}\n\n" | |
| ) | |
| ids.append(f"sloka-{id}") | |
| documents.append(text_blob) | |
| embeddings.append(get_embedding(text=text_blob)) | |
| metadatas.append( | |
| { | |
| "_global_index": id + 1, | |
| "sloka_number": sloka_num, | |
| "meaning_short": sloka["translation"], | |
| "chapter": sloka["chapter"], | |
| "sanskrit": sloka["sanskrit"], | |
| "transliteration": sloka["transliteration"], | |
| "commentary": sloka["commentary"], | |
| } | |
| ) | |
| # Add to Chroma collection | |
| collection.add(ids=ids, documents=documents, embeddings=embeddings, metadatas=metadatas) | |
| print(f"Inserted {len(documents)} slokas into collection '{COLLECTION_NAME}'.") | |