Update app.py
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
|
| 18 |
DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
| 19 |
|
| 20 |
DOWNLOAD_URLS = [
|
| 21 |
-
"https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju", "https://youtu.be/
|
| 22 |
]
|
| 23 |
|
| 24 |
USER_AGENTS = [
|
|
@@ -30,15 +30,19 @@ COOKIES_FILE = Path("youtube.com_cookies.txt").resolve() # Place your exported
|
|
| 30 |
RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
|
| 31 |
RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
|
| 32 |
PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8") # Set this environment variable to your proxy, e.g. http://user:pass@host:port
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
|
| 35 |
"""
|
| 36 |
-
Download
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
-
api_url = "https://fred808-data1.hf.space/
|
| 39 |
for url in download_urls:
|
| 40 |
try:
|
| 41 |
-
resp = requests.post(api_url, json={"
|
| 42 |
if resp.status_code == 200:
|
| 43 |
# Try to extract a video ID or use a hash for filename
|
| 44 |
if "v=" in url:
|
|
@@ -54,6 +58,12 @@ def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
|
|
| 54 |
if chunk:
|
| 55 |
f.write(chunk)
|
| 56 |
print(f"Downloaded {url} to {out_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
else:
|
| 58 |
print(f"Failed to download {url}: {resp.status_code} {resp.text}")
|
| 59 |
except Exception as e:
|
|
|
|
| 18 |
DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
| 19 |
|
| 20 |
DOWNLOAD_URLS = [
|
| 21 |
+
"https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju", "https://youtu.be/WJkI0cds4m4?si=4GlB22ly6RV32q48"
|
| 22 |
]
|
| 23 |
|
| 24 |
USER_AGENTS = [
|
|
|
|
| 30 |
RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
|
| 31 |
RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
|
| 32 |
PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8") # Set this environment variable to your proxy, e.g. http://user:pass@host:port
|
| 33 |
+
HF_DATASET_REPO_ID = os.environ.get("HF_DATASET_REPO_ID")
|
| 34 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 35 |
|
| 36 |
def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
|
| 37 |
"""
|
| 38 |
+
Download each video using the public API endpoint and save to download_dir.
|
| 39 |
+
Also copy to dataset dir and upload to HuggingFace if configured.
|
| 40 |
+
No zipping, just raw mp4s.
|
| 41 |
"""
|
| 42 |
+
api_url = "https://fred808-data1.hf.space/video/download"
|
| 43 |
for url in download_urls:
|
| 44 |
try:
|
| 45 |
+
resp = requests.post(api_url, json={"urls": [url]}, stream=True)
|
| 46 |
if resp.status_code == 200:
|
| 47 |
# Try to extract a video ID or use a hash for filename
|
| 48 |
if "v=" in url:
|
|
|
|
| 58 |
if chunk:
|
| 59 |
f.write(chunk)
|
| 60 |
print(f"Downloaded {url} to {out_path}")
|
| 61 |
+
# Copy to dataset dir and upload to HF if configured
|
| 62 |
+
if HF_DATASET_REPO_ID and HF_TOKEN:
|
| 63 |
+
upload_to_hf_dataset(out_path, HF_DATASET_REPO_ID, HF_TOKEN)
|
| 64 |
+
else:
|
| 65 |
+
import shutil
|
| 66 |
+
shutil.copy2(out_path, DATASET_DIR / out_path.name)
|
| 67 |
else:
|
| 68 |
print(f"Failed to download {url}: {resp.status_code} {resp.text}")
|
| 69 |
except Exception as e:
|