Watchhrr's picture
Update app.py
e8712c2 verified
Raw
History Blame Contribute Delete
2.41 kB
import os
import duckdb
import time
from huggingface_hub import hf_hub_download, HfApi
# --- CONFIG ---
TOKEN = os.environ.get("HF_TOKEN")
PASS = "aAsHiSh_Cyb3r_H4CK$=666=DUmp*&~"
SOURCE_REPO = "Watchhrr/HITECH_DB"
TARGET_REPO = "Watchhrr/HITECH_MASTER_DB"
MASTER_DB = "HITECH_FULL_MASTER.db"
api = HfApi(token=TOKEN)
def process_and_merge():
# 1. Main connection jo sabko jodega
con = duckdb.connect(MASTER_DB)
con.execute("CREATE TABLE IF NOT EXISTS master_data (mobile VARCHAR, id VARCHAR, name VARCHAR, address VARCHAR, state VARCHAR)")
# 36 files ko 6-6 ke groups mein process karenge
for start in range(1, 37, 6):
end = min(start + 5, 36)
print(f"\nπŸš€ Processing Range: {start} to {end}")
# A. Download
current_parts = []
for i in range(start, end + 1):
part_name = f"Hi-Tek-DB.zip.{str(i).zfill(3)}"
print(f"πŸ“₯ Downloading {part_name}...")
hf_hub_download(repo_id=SOURCE_REPO, filename=part_name, repo_type="dataset", token=TOKEN, local_dir=".")
current_parts.append(part_name)
# B. Merge Splits & Extract
print("πŸ”— Merging splits...")
os.system(f"cat Hi-Tek-DB.zip.* > temp_chunk.zip")
os.system("rm Hi-Tek-DB.zip.*") # Turant delete
print("⚑ Extracting with Password...")
os.system(f"7z e temp_chunk.zip -p'{PASS}' -y -o./temp_data")
os.remove("temp_chunk.zip")
# C. Inject into Master Table
print("πŸ’‰ Injecting into Master Table...")
con.execute("INSERT INTO master_data SELECT * FROM read_csv_auto('./temp_data/*.csv', ignore_errors=true)")
# D. Cleanup
os.system("rm -rf ./temp_data")
print(f"βœ… Range {start}-{end} merged into Master.")
# 2. Final Indexing (Search fast karne ke liye)
print("\nπŸ“Œ Creating Turbo Search Index...")
con.execute("CREATE INDEX idx_mobile ON master_data (mobile)")
con.commit()
con.close()
# 3. Final Upload
print("☁️ Uploading FULL MASTER DB to Hugging Face...")
api.upload_file(path_or_fileobj=MASTER_DB, path_in_repo=MASTER_DB, repo_id=TARGET_REPO, repo_type="dataset")
# --- EXECUTION ---
try:
process_and_merge()
print("🏁 MISSION COMPLETE! Ek hi master file ban gayi.")
except Exception as e:
print(f"🚨 Final Error: {e}")
while True: time.sleep(3600)