import os import duckdb import time from huggingface_hub import hf_hub_download, HfApi # --- CONFIG --- TOKEN = os.environ.get("HF_TOKEN") PASS = "aAsHiSh_Cyb3r_H4CK$=666=DUmp*&~" SOURCE_REPO = "Watchhrr/HITECH_DB" TARGET_REPO = "Watchhrr/HITECH_MASTER_DB" MASTER_DB = "HITECH_FULL_MASTER.db" api = HfApi(token=TOKEN) def process_and_merge(): # 1. Main connection jo sabko jodega con = duckdb.connect(MASTER_DB) con.execute("CREATE TABLE IF NOT EXISTS master_data (mobile VARCHAR, id VARCHAR, name VARCHAR, address VARCHAR, state VARCHAR)") # 36 files ko 6-6 ke groups mein process karenge for start in range(1, 37, 6): end = min(start + 5, 36) print(f"\nšŸš€ Processing Range: {start} to {end}") # A. Download current_parts = [] for i in range(start, end + 1): part_name = f"Hi-Tek-DB.zip.{str(i).zfill(3)}" print(f"šŸ“„ Downloading {part_name}...") hf_hub_download(repo_id=SOURCE_REPO, filename=part_name, repo_type="dataset", token=TOKEN, local_dir=".") current_parts.append(part_name) # B. Merge Splits & Extract print("šŸ”— Merging splits...") os.system(f"cat Hi-Tek-DB.zip.* > temp_chunk.zip") os.system("rm Hi-Tek-DB.zip.*") # Turant delete print("⚔ Extracting with Password...") os.system(f"7z e temp_chunk.zip -p'{PASS}' -y -o./temp_data") os.remove("temp_chunk.zip") # C. Inject into Master Table print("šŸ’‰ Injecting into Master Table...") con.execute("INSERT INTO master_data SELECT * FROM read_csv_auto('./temp_data/*.csv', ignore_errors=true)") # D. Cleanup os.system("rm -rf ./temp_data") print(f"āœ… Range {start}-{end} merged into Master.") # 2. Final Indexing (Search fast karne ke liye) print("\nšŸ“Œ Creating Turbo Search Index...") con.execute("CREATE INDEX idx_mobile ON master_data (mobile)") con.commit() con.close() # 3. Final Upload print("ā˜ļø Uploading FULL MASTER DB to Hugging Face...") api.upload_file(path_or_fileobj=MASTER_DB, path_in_repo=MASTER_DB, repo_id=TARGET_REPO, repo_type="dataset") # --- EXECUTION --- try: process_and_merge() print("šŸ MISSION COMPLETE! Ek hi master file ban gayi.") except Exception as e: print(f"🚨 Final Error: {e}") while True: time.sleep(3600)