Spaces:

Fred808
/

Data2

Paused

App Files Files Community

Fred808 commited on Jul 3, 2025

Commit

1e49a32

verified ·

1 Parent(s): 545b7f1

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -4

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
 DATASET_DIR.mkdir(parents=True, exist_ok=True)
 DOWNLOAD_URLS = [
-    "https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju", "https://youtu.be/kBcouGHbwfg?si=l1-A0CVacZw6_Mcm"
 ]
 USER_AGENTS = [
@@ -30,15 +30,19 @@ COOKIES_FILE = Path("youtube.com_cookies.txt").resolve()  # Place your exported
 RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
 RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
 PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8")  # Set this environment variable to your proxy, e.g. http://user:pass@host:port
 def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
     """
-    Download videos using the public API endpoint and save to download_dir.
     """
-    api_url = "https://fred808-data1.hf.space/batch/download"
     for url in download_urls:
         try:
-            resp = requests.post(api_url, json={"url": url}, stream=True)
             if resp.status_code == 200:
                 # Try to extract a video ID or use a hash for filename
                 if "v=" in url:
@@ -54,6 +58,12 @@ def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
                         if chunk:
                             f.write(chunk)
                 print(f"Downloaded {url} to {out_path}")
             else:
                 print(f"Failed to download {url}: {resp.status_code} {resp.text}")
         except Exception as e:

 DATASET_DIR.mkdir(parents=True, exist_ok=True)
 DOWNLOAD_URLS = [
+    "https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju", "https://youtu.be/WJkI0cds4m4?si=4GlB22ly6RV32q48"
 ]
 USER_AGENTS = [
 RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
 RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
 PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8")  # Set this environment variable to your proxy, e.g. http://user:pass@host:port
+HF_DATASET_REPO_ID = os.environ.get("HF_DATASET_REPO_ID")
+HF_TOKEN = os.environ.get("HF_TOKEN")
 def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
     """
+    Download each video using the public API endpoint and save to download_dir.
+    Also copy to dataset dir and upload to HuggingFace if configured.
+    No zipping, just raw mp4s.
     """
+    api_url = "https://fred808-data1.hf.space/video/download"
     for url in download_urls:
         try:
+            resp = requests.post(api_url, json={"urls": [url]}, stream=True)
             if resp.status_code == 200:
                 # Try to extract a video ID or use a hash for filename
                 if "v=" in url:
                         if chunk:
                             f.write(chunk)
                 print(f"Downloaded {url} to {out_path}")
+                # Copy to dataset dir and upload to HF if configured
+                if HF_DATASET_REPO_ID and HF_TOKEN:
+                    upload_to_hf_dataset(out_path, HF_DATASET_REPO_ID, HF_TOKEN)
+                else:
+                    import shutil
+                    shutil.copy2(out_path, DATASET_DIR / out_path.name)
             else:
                 print(f"Failed to download {url}: {resp.status_code} {resp.text}")
         except Exception as e: