import os import time import requests from urllib.parse import urlparse from huggingface_hub import upload_file from fastapi import FastAPI from contextlib import asynccontextmanager import asyncio import logging # === CONFIGURATION === HF_TOKEN = os.environ.get("HF_TOKEN") REPO_ID = "Fred808/BG1" DATA_PATH = "VFX" OUTPUT_DIR = "batch_downloads" DOWNLOAD_URLS = [ "https://ww8.zeroupload.xyz/c298c87daebf925b0281514b84b3c61c/MDS_MADVFX_DownloadPirate.com.part1.rar?download_token=a3a75ba7db6d26b354711fe2a343619e5da53edce5986248248aa36543a219b5", "https://ww8.zeroupload.xyz/d7420a15d7c5ea283e0313304d4e44dd/MDS_MADVFX_DownloadPirate.com.part2.rar?download_token=78a5191035c31b85bdccc68744aeb24e17b837b2ac22ce9c317a2aac19b506d5", "https://ww2.zeroupload.xyz/63ab0602984d504a0cddb217c52aa964/Eduardov_VFXforBeginners_DownloadPirate.com.rar?download_token=52a29c1abb7a5187f1e92eebc889bf4671d39dd071dbb0a7324bc633eef093f1", ] DELAY_BETWEEN_DOWNLOADS = 12 # seconds # === Setup Logging === logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") # === Prepare output folder === os.makedirs(OUTPUT_DIR, exist_ok=True) app = FastAPI() # === DUMMY ROUTE TO KEEP SERVER HEALTHY === @app.get("/") def keep_alive(): return {"status": "running"} # === Upload Function === def upload_to_dataset(filepath): try: upload_file( path_or_fileobj=filepath, path_in_repo=f"{DATA_PATH}/{os.path.basename(filepath)}", repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN ) logging.info(f"[↑] Uploaded: {filepath}") except Exception as e: logging.error(f"[!] Upload failed: {filepath} — {e}") # === Background Worker === async def downloader_worker(): for direct_download_link in DOWNLOAD_URLS: logging.info("[*] Waiting before next download...") await asyncio.sleep(DELAY_BETWEEN_DOWNLOADS) try: logging.info(f"[*] Downloading from: {direct_download_link}") filename = os.path.basename(urlparse(direct_download_link).path) if not filename or "." not in filename: filename = "downloaded_file_" + str(int(time.time())) local_path = os.path.join(OUTPUT_DIR, filename) logging.info(f"[*] Saving to: {local_path}") with requests.get(direct_download_link, stream=True) as r: r.raise_for_status() with open(local_path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) logging.info(f"[✓] Downloaded: {filename}") upload_to_dataset(local_path) os.remove(local_path) except Exception as e: logging.error(f"[!] Error with {direct_download_link}: {e}") logging.info("✅ All files processed.") @app.get("/") def stay_alive(): return {"msg": "Running"} @app.get("/health") def healthcheck(): return {"healthy": True} # === FastAPI Lifespan === @asynccontextmanager async def lifespan(app: FastAPI): logging.info("🚀 Starting FastAPI download-uploader microservice...") task = asyncio.create_task(downloader_worker()) yield task.cancel() logging.info("🛑 Shutting down microservice.") # === FastAPI App === app = FastAPI(lifespan=lifespan) # Re-assign app with lifespan logic app.router.lifespan_context = lifespan