Spaces:
Runtime error
Runtime error
| import os | |
| import duckdb | |
| import time | |
| from huggingface_hub import hf_hub_download, HfApi | |
| # --- CONFIG --- | |
| TOKEN = os.environ.get("HF_TOKEN") | |
| PASS = "aAsHiSh_Cyb3r_H4CK$=666=DUmp*&~" | |
| SOURCE_REPO = "Watchhrr/HITECH_DB" | |
| TARGET_REPO = "Watchhrr/HITECH_MASTER_DB" | |
| MASTER_DB = "HITECH_FULL_MASTER.db" | |
| api = HfApi(token=TOKEN) | |
| def process_and_merge(): | |
| # 1. Main connection jo sabko jodega | |
| con = duckdb.connect(MASTER_DB) | |
| con.execute("CREATE TABLE IF NOT EXISTS master_data (mobile VARCHAR, id VARCHAR, name VARCHAR, address VARCHAR, state VARCHAR)") | |
| # 36 files ko 6-6 ke groups mein process karenge | |
| for start in range(1, 37, 6): | |
| end = min(start + 5, 36) | |
| print(f"\nπ Processing Range: {start} to {end}") | |
| # A. Download | |
| current_parts = [] | |
| for i in range(start, end + 1): | |
| part_name = f"Hi-Tek-DB.zip.{str(i).zfill(3)}" | |
| print(f"π₯ Downloading {part_name}...") | |
| hf_hub_download(repo_id=SOURCE_REPO, filename=part_name, repo_type="dataset", token=TOKEN, local_dir=".") | |
| current_parts.append(part_name) | |
| # B. Merge Splits & Extract | |
| print("π Merging splits...") | |
| os.system(f"cat Hi-Tek-DB.zip.* > temp_chunk.zip") | |
| os.system("rm Hi-Tek-DB.zip.*") # Turant delete | |
| print("β‘ Extracting with Password...") | |
| os.system(f"7z e temp_chunk.zip -p'{PASS}' -y -o./temp_data") | |
| os.remove("temp_chunk.zip") | |
| # C. Inject into Master Table | |
| print("π Injecting into Master Table...") | |
| con.execute("INSERT INTO master_data SELECT * FROM read_csv_auto('./temp_data/*.csv', ignore_errors=true)") | |
| # D. Cleanup | |
| os.system("rm -rf ./temp_data") | |
| print(f"β Range {start}-{end} merged into Master.") | |
| # 2. Final Indexing (Search fast karne ke liye) | |
| print("\nπ Creating Turbo Search Index...") | |
| con.execute("CREATE INDEX idx_mobile ON master_data (mobile)") | |
| con.commit() | |
| con.close() | |
| # 3. Final Upload | |
| print("βοΈ Uploading FULL MASTER DB to Hugging Face...") | |
| api.upload_file(path_or_fileobj=MASTER_DB, path_in_repo=MASTER_DB, repo_id=TARGET_REPO, repo_type="dataset") | |
| # --- EXECUTION --- | |
| try: | |
| process_and_merge() | |
| print("π MISSION COMPLETE! Ek hi master file ban gayi.") | |
| except Exception as e: | |
| print(f"π¨ Final Error: {e}") | |
| while True: time.sleep(3600) | |