Fred808 commited on
Commit
3a322ec
·
verified ·
1 Parent(s): f9dcb89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -109
app.py CHANGED
@@ -1,110 +1,110 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.responses import FileResponse
3
- from pathlib import Path
4
- import os
5
- import threading
6
- import requests
7
- from huggingface_hub import HfApi
8
- import random
9
- import time
10
-
11
- app = FastAPI()
12
-
13
- DOWNLOAD_DIR = Path("downloaded").resolve()
14
- DATASET_DIR = Path("dataset").resolve()
15
- MAX_VIDEOS = 5000
16
-
17
- DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
18
- DATASET_DIR.mkdir(parents=True, exist_ok=True)
19
-
20
- DOWNLOAD_URLS = [
21
- "https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju"
22
- ]
23
-
24
- USER_AGENTS = [
25
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
26
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
27
- ]
28
-
29
- COOKIES_FILE = Path("youtube.com_cookies.txt").resolve() # Place your exported cookies file here
30
- RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
31
- RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
32
- PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8") # Set this environment variable to your proxy, e.g. http://user:pass@host:port
33
-
34
- def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
35
- """
36
- Download videos using the public API endpoint and save to download_dir.
37
- """
38
- api_url = "https://fred808-data1.hf.space/video/download"
39
- for url in download_urls:
40
- try:
41
- resp = requests.post(api_url, json={"url": url}, stream=True)
42
- if resp.status_code == 200:
43
- # Try to extract a video ID or use a hash for filename
44
- if "v=" in url:
45
- video_id = url.split("v=")[1].split("&")[0]
46
- elif "youtu.be/" in url:
47
- video_id = url.split("youtu.be/")[1].split("?")[0]
48
- else:
49
- import hashlib
50
- video_id = hashlib.md5(url.encode()).hexdigest()
51
- out_path = download_dir / f"{video_id}.mp4"
52
- with open(out_path, "wb") as f:
53
- for chunk in resp.iter_content(chunk_size=8192):
54
- if chunk:
55
- f.write(chunk)
56
- print(f"Downloaded {url} to {out_path}")
57
- else:
58
- print(f"Failed to download {url}: {resp.status_code} {resp.text}")
59
- except Exception as e:
60
- print(f"Error downloading {url}: {e}")
61
-
62
- @app.on_event("startup")
63
- def startup_event():
64
- threading.Thread(target=batch_download_via_api, args=(DOWNLOAD_URLS,), daemon=True).start()
65
-
66
- @app.get("/files")
67
- def list_files():
68
- files = [f.name for f in DOWNLOAD_DIR.glob("*") if f.is_file()]
69
- return {"files": files}
70
-
71
- @app.get("/download/{filename}")
72
- def download_file(filename: str):
73
- file_path = DOWNLOAD_DIR / filename
74
- if not file_path.exists() or not file_path.is_file():
75
- raise HTTPException(status_code=404, detail="File not found")
76
- return FileResponse(str(file_path), filename=filename)
77
-
78
- @app.get("/")
79
- def root():
80
- files = [f.name for f in DOWNLOAD_DIR.glob("*") if f.is_file()]
81
- return {"message": "Use /download/{filename} to download a file.", "available_files": files}
82
-
83
- def upload_to_hf_dataset(local_path, repo_id, token):
84
- api = HfApi()
85
- try:
86
- # Also copy to dataset dir for local access
87
- import shutil
88
- shutil.copy2(local_path, DATASET_DIR / local_path.name)
89
- api.upload_file(
90
- path_or_fileobj=str(local_path),
91
- path_in_repo=local_path.name,
92
- repo_id=repo_id,
93
- repo_type="dataset",
94
- token=token,
95
- )
96
- print(f"Uploaded {local_path.name} to {repo_id} and copied to dataset dir")
97
- except Exception as e:
98
- print(f"Failed to upload {local_path.name} to {repo_id}: {e}")
99
-
100
- @app.get("/dataset/{filename}")
101
- def download_dataset_file(filename: str):
102
- file_path = DATASET_DIR / filename
103
- if not file_path.exists() or not file_path.is_file():
104
- raise HTTPException(status_code=404, detail="File not found in dataset")
105
- return FileResponse(str(file_path), filename=filename)
106
-
107
- @app.get("/dataset")
108
- def list_dataset_files():
109
- files = [f.name for f in DATASET_DIR.glob("*") if f.is_file()]
110
  return {"dataset_files": files}
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import FileResponse
3
+ from pathlib import Path
4
+ import os
5
+ import threading
6
+ import requests
7
+ from huggingface_hub import HfApi
8
+ import random
9
+ import time
10
+
11
+ app = FastAPI()
12
+
13
+ DOWNLOAD_DIR = Path("downloaded").resolve()
14
+ DATASET_DIR = Path("dataset").resolve()
15
+ MAX_VIDEOS = 5000
16
+
17
+ DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
18
+ DATASET_DIR.mkdir(parents=True, exist_ok=True)
19
+
20
+ DOWNLOAD_URLS = [
21
+ "https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju"
22
+ ]
23
+
24
+ USER_AGENTS = [
25
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
26
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
27
+ ]
28
+
29
+ COOKIES_FILE = Path("youtube.com_cookies.txt").resolve() # Place your exported cookies file here
30
+ RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
31
+ RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
32
+ PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8") # Set this environment variable to your proxy, e.g. http://user:pass@host:port
33
+
34
+ def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
35
+ """
36
+ Download videos using the public API endpoint and save to download_dir.
37
+ """
38
+ api_url = "https://fred808-data1.hf.space/batch/download"
39
+ for url in download_urls:
40
+ try:
41
+ resp = requests.post(api_url, json={"url": url}, stream=True)
42
+ if resp.status_code == 200:
43
+ # Try to extract a video ID or use a hash for filename
44
+ if "v=" in url:
45
+ video_id = url.split("v=")[1].split("&")[0]
46
+ elif "youtu.be/" in url:
47
+ video_id = url.split("youtu.be/")[1].split("?")[0]
48
+ else:
49
+ import hashlib
50
+ video_id = hashlib.md5(url.encode()).hexdigest()
51
+ out_path = download_dir / f"{video_id}.mp4"
52
+ with open(out_path, "wb") as f:
53
+ for chunk in resp.iter_content(chunk_size=8192):
54
+ if chunk:
55
+ f.write(chunk)
56
+ print(f"Downloaded {url} to {out_path}")
57
+ else:
58
+ print(f"Failed to download {url}: {resp.status_code} {resp.text}")
59
+ except Exception as e:
60
+ print(f"Error downloading {url}: {e}")
61
+
62
+ @app.on_event("startup")
63
+ def startup_event():
64
+ threading.Thread(target=batch_download_via_api, args=(DOWNLOAD_URLS,), daemon=True).start()
65
+
66
+ @app.get("/files")
67
+ def list_files():
68
+ files = [f.name for f in DOWNLOAD_DIR.glob("*") if f.is_file()]
69
+ return {"files": files}
70
+
71
+ @app.get("/download/{filename}")
72
+ def download_file(filename: str):
73
+ file_path = DOWNLOAD_DIR / filename
74
+ if not file_path.exists() or not file_path.is_file():
75
+ raise HTTPException(status_code=404, detail="File not found")
76
+ return FileResponse(str(file_path), filename=filename)
77
+
78
+ @app.get("/")
79
+ def root():
80
+ files = [f.name for f in DOWNLOAD_DIR.glob("*") if f.is_file()]
81
+ return {"message": "Use /download/{filename} to download a file.", "available_files": files}
82
+
83
+ def upload_to_hf_dataset(local_path, repo_id, token):
84
+ api = HfApi()
85
+ try:
86
+ # Also copy to dataset dir for local access
87
+ import shutil
88
+ shutil.copy2(local_path, DATASET_DIR / local_path.name)
89
+ api.upload_file(
90
+ path_or_fileobj=str(local_path),
91
+ path_in_repo=local_path.name,
92
+ repo_id=repo_id,
93
+ repo_type="dataset",
94
+ token=token,
95
+ )
96
+ print(f"Uploaded {local_path.name} to {repo_id} and copied to dataset dir")
97
+ except Exception as e:
98
+ print(f"Failed to upload {local_path.name} to {repo_id}: {e}")
99
+
100
+ @app.get("/dataset/{filename}")
101
+ def download_dataset_file(filename: str):
102
+ file_path = DATASET_DIR / filename
103
+ if not file_path.exists() or not file_path.is_file():
104
+ raise HTTPException(status_code=404, detail="File not found in dataset")
105
+ return FileResponse(str(file_path), filename=filename)
106
+
107
+ @app.get("/dataset")
108
+ def list_dataset_files():
109
+ files = [f.name for f in DATASET_DIR.glob("*") if f.is_file()]
110
  return {"dataset_files": files}