Spaces:

MarneMorgan
/

ai-ffmpeg-render

Sleeping

App Files Files Community

MarneMorgan commited on Dec 22, 2025

Commit

ea19c61

verified ·

1 Parent(s): 3fa1ad0

Create downloader.py

Browse files

Files changed (1) hide show

downloader.py +94 -0

downloader.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import re
+import time
+import requests
+from pathlib import Path
+from urllib.parse import urlparse
+MAX_BYTES = int(os.getenv("MAX_DOWNLOAD_BYTES", str(250 * 1024 * 1024)))
+TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "60"))
+UA = "Mozilla/5.0 (compatible; ai-ffmpeg-render/1.0; +https://huggingface.co/spaces)"
+def _safe_filename(name: str, fallback: str) -> str:
+    name = (name or "").strip() or fallback
+    name = name.replace("\\", "_").replace("/", "_").replace("..", "_")
+    name = re.sub(r"[^a-zA-Z0-9._-]+", "_", name)
+    return name[:120]
+def _guess_ext(headers: dict) -> str:
+    ctype = (headers.get("content-type") or "").lower()
+    if "video/mp4" in ctype: return ".mp4"
+    if "video/webm" in ctype: return ".webm"
+    if "video/quicktime" in ctype: return ".mov"
+    if "audio/mpeg" in ctype: return ".mp3"
+    if "audio/mp4" in ctype: return ".m4a"
+    if "audio/wav" in ctype or "audio/x-wav" in ctype: return ".wav"
+    if "image/png" in ctype: return ".png"
+    if "image/jpeg" in ctype: return ".jpg"
+    if "image/webp" in ctype: return ".webp"
+    if "text/vtt" in ctype: return ".vtt"
+    return ""
+def download_to(url: str, dest_dir: Path, index: int) -> Path:
+    p = urlparse(url)
+    if p.scheme not in ("http", "https"):
+        raise ValueError("Only http/https URLs are allowed.")
+    base = Path(p.path).name
+    filename = _safe_filename(base, f"input{index}.bin")
+    headers = {"User-Agent": UA, "Accept": "*/*"}
+    last_err = None
+    for attempt in range(1, 3):  # 2 tries
+        try:
+            with requests.get(
+                url,
+                stream=True,
+                timeout=TIMEOUT,
+                allow_redirects=True,
+                headers=headers,
+            ) as r:
+                r.raise_for_status()
+                # Infer ext if missing
+                if "." not in filename or filename.endswith(".bin"):
+                    ext = _guess_ext(r.headers)
+                    if ext and not filename.lower().endswith(ext):
+                        # keep name stable but add ext
+                        if filename.endswith(".bin"):
+                            filename = filename[:-4] + ext
+                        else:
+                            filename = filename + ext
+                out = dest_dir / filename
+                total = 0
+                first_bytes = b""
+                with open(out, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=1024 * 1024):
+                        if not chunk:
+                            continue
+                        if total == 0:
+                            first_bytes = chunk[:200].lstrip().lower()
+                            # Many CDNs return HTML/XML when expired/denied
+                            if first_bytes.startswith(b"<html") or first_bytes.startswith(b"<!doctype html") or first_bytes.startswith(b"<?xml"):
+                                raise ValueError("URL returned HTML/XML (often expired/not a direct file).")
+                        total += len(chunk)
+                        if total > MAX_BYTES:
+                            raise ValueError(f"File too large (>{MAX_BYTES} bytes).")
+                        f.write(chunk)
+                if total < 2048:
+                    raise ValueError("Downloaded file is too small (likely invalid/expired).")
+                return out
+        except Exception as e:
+            last_err = str(e)
+            # small backoff
+            time.sleep(0.6 * attempt)
+    raise ValueError(f"Download failed after retries: {last_err}")