Fred808 commited on
Commit
1e49a32
·
verified ·
1 Parent(s): 545b7f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -18,7 +18,7 @@ DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
18
  DATASET_DIR.mkdir(parents=True, exist_ok=True)
19
 
20
  DOWNLOAD_URLS = [
21
- "https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju", "https://youtu.be/kBcouGHbwfg?si=l1-A0CVacZw6_Mcm"
22
  ]
23
 
24
  USER_AGENTS = [
@@ -30,15 +30,19 @@ COOKIES_FILE = Path("youtube.com_cookies.txt").resolve() # Place your exported
30
  RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
31
  RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
32
  PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8") # Set this environment variable to your proxy, e.g. http://user:pass@host:port
 
 
33
 
34
  def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
35
  """
36
- Download videos using the public API endpoint and save to download_dir.
 
 
37
  """
38
- api_url = "https://fred808-data1.hf.space/batch/download"
39
  for url in download_urls:
40
  try:
41
- resp = requests.post(api_url, json={"url": url}, stream=True)
42
  if resp.status_code == 200:
43
  # Try to extract a video ID or use a hash for filename
44
  if "v=" in url:
@@ -54,6 +58,12 @@ def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
54
  if chunk:
55
  f.write(chunk)
56
  print(f"Downloaded {url} to {out_path}")
 
 
 
 
 
 
57
  else:
58
  print(f"Failed to download {url}: {resp.status_code} {resp.text}")
59
  except Exception as e:
 
18
  DATASET_DIR.mkdir(parents=True, exist_ok=True)
19
 
20
  DOWNLOAD_URLS = [
21
+ "https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju", "https://youtu.be/WJkI0cds4m4?si=4GlB22ly6RV32q48"
22
  ]
23
 
24
  USER_AGENTS = [
 
30
  RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
31
  RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
32
  PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8") # Set this environment variable to your proxy, e.g. http://user:pass@host:port
33
+ HF_DATASET_REPO_ID = os.environ.get("HF_DATASET_REPO_ID")
34
+ HF_TOKEN = os.environ.get("HF_TOKEN")
35
 
36
  def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
37
  """
38
+ Download each video using the public API endpoint and save to download_dir.
39
+ Also copy to dataset dir and upload to HuggingFace if configured.
40
+ No zipping, just raw mp4s.
41
  """
42
+ api_url = "https://fred808-data1.hf.space/video/download"
43
  for url in download_urls:
44
  try:
45
+ resp = requests.post(api_url, json={"urls": [url]}, stream=True)
46
  if resp.status_code == 200:
47
  # Try to extract a video ID or use a hash for filename
48
  if "v=" in url:
 
58
  if chunk:
59
  f.write(chunk)
60
  print(f"Downloaded {url} to {out_path}")
61
+ # Copy to dataset dir and upload to HF if configured
62
+ if HF_DATASET_REPO_ID and HF_TOKEN:
63
+ upload_to_hf_dataset(out_path, HF_DATASET_REPO_ID, HF_TOKEN)
64
+ else:
65
+ import shutil
66
+ shutil.copy2(out_path, DATASET_DIR / out_path.name)
67
  else:
68
  print(f"Failed to download {url}: {resp.status_code} {resp.text}")
69
  except Exception as e: