Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import argparse | |
| import chromadb | |
| from tqdm import tqdm # Optional: For progress bar | |
| db_config = { | |
| "youtube_db": { | |
| "source_db_path": "../youtube_surfer_ai_agent/youtube_db", | |
| "source_collection_name": "yt_metadata", | |
| "destination_collection_name": "yt_metadata", | |
| }, | |
| "divya_prabandham": { | |
| "source_db_path": "../uveda_analyzer/chromadb_store", | |
| "source_collection_name": "divya_prabandham", | |
| "destination_collection_name": "divya_prabandham", | |
| }, | |
| "divya_prabandham_taniyans": { | |
| "source_db_path": "../uveda_analyzer/chromadb_store", | |
| "source_collection_name": "divya_prabandham_taniyans", | |
| "destination_collection_name": "divya_prabandham_taniyans", | |
| }, | |
| "vishnu_sahasranamam": { | |
| "source_db_path": "../vishnu_sahasranamam_ai/output/chroma_store", | |
| "source_collection_name": "vishnu_sahasranamam", | |
| "destination_collection_name": "vishnu_sahasranamam_openai", | |
| }, | |
| "bhagavat_gita": { | |
| "source_db_path": "../bhagavat_gita_chat/chromadb_store", | |
| "source_collection_name": "bhagavat_gita", | |
| "destination_collection_name": "bhagavat_gita_openai", | |
| }, | |
| "pancha_sooktham": { | |
| "source_db_path": "../sooktham_ai/chromadb_store", | |
| "source_collection_name": "pancha_sooktham", | |
| "destination_collection_name": "pancha_sooktham", | |
| }, | |
| "taitriya_upanishad": { | |
| "source_db_path": "../taitriya_upanishad_ai/chromadb_store", | |
| "source_collection_name": "taitriya_upanishad", | |
| "destination_collection_name": "taitriya_upanishad", | |
| }, | |
| "shanthi_panchakam": { | |
| "source_db_path": "../shanthi_panchakam_ai/chromadb_store", | |
| "source_collection_name": "shanthi_panchakam", | |
| "destination_collection_name": "shanthi_panchakam", | |
| }, | |
| "taitriya_samhitha": { | |
| "source_db_path": "../taitriya_samhitha_ai/chromadb_store", | |
| "source_collection_name": "taitriya_samhitha", | |
| "destination_collection_name": "taitriya_samhitha", | |
| }, | |
| "taitriya_brahmanam": { | |
| "source_db_path": "../taitriya_brahmanam_ai/chromadb_store", | |
| "source_collection_name": "taitriya_brahmanam", | |
| "destination_collection_name": "taitriya_brahmanam", | |
| }, | |
| "katakam": { | |
| "source_db_path": "../taitriya_brahmanam_ai/chromadb_store", | |
| "source_collection_name": "katakam", | |
| "destination_collection_name": "katakam", | |
| }, | |
| "sri_stavam": { | |
| "source_db_path": "../vedam_ai/chromadb-store", | |
| "source_collection_name": "sri_stavam", | |
| "destination_collection_name": "sri_stavam", | |
| }, | |
| "taitriya_aranyakam": { | |
| "source_db_path": "../taitriya_aranyakam_ai/chromadb_store", | |
| "source_collection_name": "taitriya_aranyakam", | |
| "destination_collection_name": "taitriya_aranyakam", | |
| }, | |
| "brahma_sutra": { | |
| "source_db_path": "../brahma_sutra_ai/chromadb_store", | |
| "source_collection_name": "brahma_sutra", | |
| "destination_collection_name": "brahma_sutra", | |
| }, | |
| "valmiki_ramayanam": { | |
| "source_db_path": "../valmiki_ramayanam_ai/chromadb_store", | |
| "source_collection_name": "valmiki_ramayanam", | |
| "destination_collection_name": "valmiki_ramayanam_openai", | |
| }, | |
| "sri_vachana_bhushanam": { | |
| "source_db_path": "../sri_vachana_bhushanam_ai/chromadb_store", | |
| "source_collection_name": "sri_vachana_bhushanam", | |
| "destination_collection_name": "sri_vachana_bhushanam", | |
| }, | |
| "desika_prabandham": { | |
| "source_db_path": "../desika_prabandham_ai/chromadb_store", | |
| "source_collection_name": "desika_prabandham", | |
| "destination_collection_name": "desika_prabandham", | |
| }, | |
| "raghuveera_gadhyam": { | |
| "source_db_path": "../raghuveera_gadhyam_ai/chromadb_store", | |
| "source_collection_name": "raghuveera_gadhyam", | |
| "destination_collection_name": "raghuveera_gadhyam", | |
| }, | |
| "narayaneeyam": { | |
| "source_db_path": "../narayaneeyam_ai/chromadb_store", | |
| "source_collection_name": "narayaneeyam", | |
| "destination_collection_name": "narayaneeyam", | |
| }, | |
| "bhagavata_purana": { | |
| "source_db_path": "../bhagavata_purana_ai/chromadb_store", | |
| "source_collection_name": "bhagavata_purana", | |
| "destination_collection_name": "bhagavata_purana", | |
| }, | |
| "agnipuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "agnipuranam", | |
| "destination_collection_name": "agnipuranam" | |
| }, | |
| "bhavishyapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "bhavishyapuranam", | |
| "destination_collection_name": "bhavishyapuranam" | |
| }, | |
| "brahmandpuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "brahmandpuranam", | |
| "destination_collection_name": "brahmandpuranam" | |
| }, | |
| "brahmapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "brahmapuranam", | |
| "destination_collection_name": "brahmapuranam" | |
| }, | |
| "brahmavaivarthapurana": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "brahmavaivarthapurana", | |
| "destination_collection_name": "brahmavaivarthapurana" | |
| }, | |
| "garudapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "garudapuranam", | |
| "destination_collection_name": "garudapuranam" | |
| }, | |
| "harivanshapuraanam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "harivanshapuraanam", | |
| "destination_collection_name": "harivanshapuraanam" | |
| }, | |
| "kurmapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "kurmapuranam", | |
| "destination_collection_name": "kurmapuranam" | |
| }, | |
| "lingapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "lingapuranam", | |
| "destination_collection_name": "lingapuranam" | |
| }, | |
| "markandeypuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "markandeypuranam", | |
| "destination_collection_name": "markandeypuranam" | |
| }, | |
| "matsyapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "matsyapuranam", | |
| "destination_collection_name": "matsyapuranam" | |
| }, | |
| "naradapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "naradapuranam", | |
| "destination_collection_name": "naradapuranam" | |
| }, | |
| "padmapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "padmapuranam", | |
| "destination_collection_name": "padmapuranam" | |
| }, | |
| "shivapuraanam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "shivapuraanam", | |
| "destination_collection_name": "shivapuraanam" | |
| }, | |
| "skandapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "skandapuranam", | |
| "destination_collection_name": "skandapuranam" | |
| }, | |
| "vaamanapuraanam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "vaamanapuraanam", | |
| "destination_collection_name": "vaamanapuraanam" | |
| }, | |
| "vaayupuraanam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "vaayupuraanam", | |
| "destination_collection_name": "vaayupuraanam" | |
| }, | |
| "varahapuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "varahapuranam", | |
| "destination_collection_name": "varahapuranam" | |
| }, | |
| "vishnupuranam": { | |
| "source_db_path": "../puranas_ai/chromadb_store", | |
| "source_collection_name": "vishnupuranam", | |
| "destination_collection_name": "vishnu_puranam_openai" | |
| }, | |
| "vaazhi_thirunaamams": { | |
| "source_db_path": "../vaazhi_thirunamam/chromadb_store", | |
| "source_collection_name": "vaazhi_thirunaamams", | |
| "destination_collection_name": "vaazhi_thirunaamams" | |
| }, | |
| "upadesa_rathnamalai": { | |
| "source_db_path": "../upadesa_rathnamalai/chromadb_store", | |
| "source_collection_name": "upadesa_rathnamalai", | |
| "destination_collection_name": "upadesa_rathnamalai" | |
| }, | |
| "thiruvaimozhi_nootrandhadhi": { | |
| "source_db_path": "../thiruvaimozhi_nootrandhadhi_ai/chromadb_store", | |
| "source_collection_name": "thiruvaimozhi_nootrandhadhi", | |
| "destination_collection_name": "thiruvaimozhi_nootrandhadhi" | |
| }, | |
| "devaraja_ashtakam": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "devaraja_ashtakam", | |
| "destination_collection_name": "devaraja_ashtakam" | |
| }, | |
| "geethartha_sangraha": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "geethartha_sangraha", | |
| "destination_collection_name": "geethartha_sangraha" | |
| }, | |
| "mukunda_mala": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "mukunda_mala", | |
| "destination_collection_name": "mukunda_mala" | |
| }, | |
| "narasimha_ashtakam": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "narasimha_ashtakam", | |
| "destination_collection_name": "narasimha_ashtakam" | |
| }, | |
| "panchayudha_stotram": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "panchayudha_stotram", | |
| "destination_collection_name": "panchayudha_stotram" | |
| }, | |
| "ranganatha_stotram": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "ranganatha_stotram", | |
| "destination_collection_name": "ranganatha_stotram" | |
| }, | |
| "devaraja_mangalam": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "devaraja_mangalam", | |
| "destination_collection_name": "devaraja_mangalam" | |
| }, | |
| "dhati_panchakam": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "dhati_panchakam", | |
| "destination_collection_name": "dhati_panchakam" | |
| }, | |
| "mukthaka_mangalam": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "mukthaka_mangalam", | |
| "destination_collection_name": "mukthaka_mangalam" | |
| }, | |
| "venkateswara_stotram": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "venkateswara_stotram", | |
| "destination_collection_name": "venkateswara_stotram" | |
| }, | |
| "yathiraja_vimsathi": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "yathiraja_vimsathi", | |
| "destination_collection_name": "yathiraja_vimsathi" | |
| }, | |
| "gadhyathrayam": { | |
| "source_db_path": "../stotra_patam_ai/chromadb_store", | |
| "source_collection_name": "gadhyathrayam", | |
| "destination_collection_name": "gadhyathrayam" | |
| }, | |
| "aarthi_prabandham": { | |
| "source_db_path": "../aarthi_prabandham_ai/chromadb_store", | |
| "source_collection_name": "aarthi_prabandham", | |
| "destination_collection_name": "aarthi_prabandham" | |
| }, | |
| "padhuka_sahasram": { | |
| "source_db_path": "../padhuka_sahasram/chromadb_store", | |
| "source_collection_name": "padhuka_sahasram", | |
| "destination_collection_name": "padhuka_sahasram" | |
| }, | |
| } | |
| parser = argparse.ArgumentParser(description="My app with database parameter") | |
| parser.add_argument( | |
| "--db", | |
| type=str, | |
| required=True, | |
| choices=list(db_config.keys()), | |
| help=f"Id of the database to use. allowed_values : {', '.join(db_config.keys())}", | |
| ) | |
| args = parser.parse_args() | |
| db_id = args.db | |
| if db_id is None: | |
| raise Exception(f"No db provided!") | |
| if db_id not in db_config: | |
| raise Exception(f"db with id {db_id} not found!") | |
| # Connect to source and destination local persistent clients | |
| source_client = chromadb.PersistentClient(path=db_config[db_id]["source_db_path"]) | |
| destination_client = chromadb.PersistentClient(path="./chromadb-store") | |
| source_collection_name = db_config[db_id]["source_collection_name"] | |
| destination_collection_name = db_config[db_id]["destination_collection_name"] | |
| # Get the source collection | |
| source_collection = source_client.get_collection(source_collection_name) | |
| # Retrieve all data from the source collection | |
| source_data = source_collection.get(include=["documents", "metadatas", "embeddings"]) | |
| # Create or get the destination collection | |
| if destination_client.get_or_create_collection(destination_collection_name): | |
| print("Deleting existing collection", destination_collection_name) | |
| destination_client.delete_collection(destination_collection_name) | |
| destination_collection = destination_client.get_or_create_collection( | |
| destination_collection_name, | |
| metadata=source_collection.metadata, # Copy metadata if needed | |
| ) | |
| # Add data to the destination collection in batches | |
| BATCH_SIZE = 500 | |
| total_records = len(source_data["ids"]) | |
| print(f"Copying {total_records} records in batches of {BATCH_SIZE}...") | |
| for i in tqdm(range(0, total_records, BATCH_SIZE)): | |
| batch_ids = source_data["ids"][i : i + BATCH_SIZE] | |
| batch_docs = source_data["documents"][i : i + BATCH_SIZE] | |
| batch_metas = source_data["metadatas"][i : i + BATCH_SIZE] | |
| batch_embeds = ( | |
| source_data["embeddings"][i : i + BATCH_SIZE] | |
| if "embeddings" in source_data and source_data["embeddings"] is not None | |
| else None | |
| ) | |
| destination_collection.add( | |
| ids=batch_ids, | |
| documents=batch_docs, | |
| metadatas=batch_metas, | |
| embeddings=batch_embeds, | |
| ) | |
| print("✅ Collection copied successfully!") | |
| print("Total records in source collection = ", source_collection.count()) | |
| print("Total records in destination collection = ", destination_collection.count()) | |