import os import time import requests from urllib.parse import urlparse from huggingface_hub import upload_file from fastapi import FastAPI from contextlib import asynccontextmanager import asyncio import logging # === CONFIGURATION === HF_TOKEN = os.environ.get("HF_TOKEN") REPO_ID = "Fred808/BG1" DATA_PATH = "Blenders" OUTPUT_DIR = "batch_downloads" DOWNLOAD_URLS = [ "https://ww2.zeroupload.xyz/2933743b4e82d467d030f6576a7ba012/ArtStation_UE5_EnvironmentDesignIvanYosifov_DownloadPirate.com.rar?download_token=acf1b32801403d83d7abebaf3625274bd1ad3ca4800f8ff3df1e6389212a047c", "https://ww2.zeroupload.xyz/19eb00472871b228c0d57be03ae56f56/Udemy_UE5BeginnerCrashCourse_DownloadPirate.com.rar?download_token=8717c95d273fa5f0ceb992d2b0e1d355b70b24b2e0b4d119bf4389872b8c1c60", "https://ww2.zeroupload.xyz/031d0e2a5436e67c600382381d710a76/Udemy_UnrealEngine5BeginnersCourse_DownloadPirate.com.rar?download_token=3bc82772a695a442ee25ff34b91d2bb022cc5171b8b2b023cb1181534643a1f6", ] DELAY_BETWEEN_DOWNLOADS = 12 # seconds # === Setup Logging === logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") # === Prepare output folder === os.makedirs(OUTPUT_DIR, exist_ok=True) app = FastAPI() # === DUMMY ROUTE TO KEEP SERVER HEALTHY === @app.get("/") def keep_alive(): return {"status": "running"} # === Upload Function === def upload_to_dataset(filepath): try: upload_file( path_or_fileobj=filepath, path_in_repo=f"{DATA_PATH}/{os.path.basename(filepath)}", repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN ) logging.info(f"[↑] Uploaded: {filepath}") except Exception as e: logging.error(f"[!] Upload failed: {filepath} — {e}") # === Background Worker === async def downloader_worker(): for direct_download_link in DOWNLOAD_URLS: logging.info("[*] Waiting before next download...") await asyncio.sleep(DELAY_BETWEEN_DOWNLOADS) try: logging.info(f"[*] Downloading from: {direct_download_link}") filename = os.path.basename(urlparse(direct_download_link).path) if not filename or "." not in filename: filename = "downloaded_file_" + str(int(time.time())) local_path = os.path.join(OUTPUT_DIR, filename) logging.info(f"[*] Saving to: {local_path}") with requests.get(direct_download_link, stream=True) as r: r.raise_for_status() with open(local_path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) logging.info(f"[✓] Downloaded: {filename}") upload_to_dataset(local_path) os.remove(local_path) except Exception as e: logging.error(f"[!] Error with {direct_download_link}: {e}") logging.info("✅ All files processed.") @app.get("/") def stay_alive(): return {"msg": "Running"} @app.get("/health") def healthcheck(): return {"healthy": True} # === FastAPI Lifespan === @asynccontextmanager async def lifespan(app: FastAPI): logging.info("🚀 Starting FastAPI download-uploader microservice...") task = asyncio.create_task(downloader_worker()) yield task.cancel() logging.info("🛑 Shutting down microservice.") # === FastAPI App === app = FastAPI(lifespan=lifespan) # Re-assign app with lifespan logic app.router.lifespan_context = lifespan