Fred808 commited on
Commit
ba9a039
·
verified ·
1 Parent(s): 34e1a80

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +28 -0
  2. app.py +110 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ # Python + dependencies
6
+ RUN apt-get update && apt-get install -y python3 python3-pip git && \
7
+ pip3 install --upgrade pip
8
+
9
+ # Set working dir
10
+ WORKDIR /app
11
+
12
+ # Copy and install requirements
13
+ COPY requirements.txt ./
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy app code
17
+ COPY . .
18
+
19
+
20
+ # Make the entire /app directory fully writeable for all users
21
+ RUN chmod -R 777 /app
22
+
23
+ # Ensure the app runs as the same user as the Space UI
24
+ RUN useradd -m -u 1000 user
25
+ USER user
26
+
27
+ # Launch FastAPI download server on container start
28
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import FileResponse
3
+ from pathlib import Path
4
+ import os
5
+ import threading
6
+ import requests
7
+ from huggingface_hub import HfApi
8
+ import random
9
+ import time
10
+
11
+ app = FastAPI()
12
+
13
+ DOWNLOAD_DIR = Path("downloaded").resolve()
14
+ DATASET_DIR = Path("dataset").resolve()
15
+ MAX_VIDEOS = 5000
16
+
17
+ DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
18
+ DATASET_DIR.mkdir(parents=True, exist_ok=True)
19
+
20
+ DOWNLOAD_URLS = [
21
+ "https://youtu.be/ULCkj_Q5NCc?si=P5fVfGeL9dc47tju"
22
+ ]
23
+
24
+ USER_AGENTS = [
25
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
26
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
27
+ ]
28
+
29
+ COOKIES_FILE = Path("youtube.com_cookies.txt").resolve() # Place your exported cookies file here
30
+ RAPIDAPI_HOST = "yt-api.p.rapidapi.com"
31
+ RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY", "7b63a42ed4msha215d4e2fb17099p17ae62jsn0f42bd187691")
32
+ PROXY = os.environ.get("198.23.239.134:6540:kknqfmqe:0wyvognccou8") # Set this environment variable to your proxy, e.g. http://user:pass@host:port
33
+
34
+ def batch_download_via_api(download_urls, download_dir=DOWNLOAD_DIR):
35
+ """
36
+ Download videos using the public API endpoint and save to download_dir.
37
+ """
38
+ api_url = "https://fred808-data1.hf.space/video/download"
39
+ for url in download_urls:
40
+ try:
41
+ resp = requests.post(api_url, json={"url": url}, stream=True)
42
+ if resp.status_code == 200:
43
+ # Try to extract a video ID or use a hash for filename
44
+ if "v=" in url:
45
+ video_id = url.split("v=")[1].split("&")[0]
46
+ elif "youtu.be/" in url:
47
+ video_id = url.split("youtu.be/")[1].split("?")[0]
48
+ else:
49
+ import hashlib
50
+ video_id = hashlib.md5(url.encode()).hexdigest()
51
+ out_path = download_dir / f"{video_id}.mp4"
52
+ with open(out_path, "wb") as f:
53
+ for chunk in resp.iter_content(chunk_size=8192):
54
+ if chunk:
55
+ f.write(chunk)
56
+ print(f"Downloaded {url} to {out_path}")
57
+ else:
58
+ print(f"Failed to download {url}: {resp.status_code} {resp.text}")
59
+ except Exception as e:
60
+ print(f"Error downloading {url}: {e}")
61
+
62
+ @app.on_event("startup")
63
+ def startup_event():
64
+ threading.Thread(target=batch_download_via_api, args=(DOWNLOAD_URLS,), daemon=True).start()
65
+
66
+ @app.get("/files")
67
+ def list_files():
68
+ files = [f.name for f in DOWNLOAD_DIR.glob("*") if f.is_file()]
69
+ return {"files": files}
70
+
71
+ @app.get("/download/{filename}")
72
+ def download_file(filename: str):
73
+ file_path = DOWNLOAD_DIR / filename
74
+ if not file_path.exists() or not file_path.is_file():
75
+ raise HTTPException(status_code=404, detail="File not found")
76
+ return FileResponse(str(file_path), filename=filename)
77
+
78
+ @app.get("/")
79
+ def root():
80
+ files = [f.name for f in DOWNLOAD_DIR.glob("*") if f.is_file()]
81
+ return {"message": "Use /download/{filename} to download a file.", "available_files": files}
82
+
83
+ def upload_to_hf_dataset(local_path, repo_id, token):
84
+ api = HfApi()
85
+ try:
86
+ # Also copy to dataset dir for local access
87
+ import shutil
88
+ shutil.copy2(local_path, DATASET_DIR / local_path.name)
89
+ api.upload_file(
90
+ path_or_fileobj=str(local_path),
91
+ path_in_repo=local_path.name,
92
+ repo_id=repo_id,
93
+ repo_type="dataset",
94
+ token=token,
95
+ )
96
+ print(f"Uploaded {local_path.name} to {repo_id} and copied to dataset dir")
97
+ except Exception as e:
98
+ print(f"Failed to upload {local_path.name} to {repo_id}: {e}")
99
+
100
+ @app.get("/dataset/{filename}")
101
+ def download_dataset_file(filename: str):
102
+ file_path = DATASET_DIR / filename
103
+ if not file_path.exists() or not file_path.is_file():
104
+ raise HTTPException(status_code=404, detail="File not found in dataset")
105
+ return FileResponse(str(file_path), filename=filename)
106
+
107
+ @app.get("/dataset")
108
+ def list_dataset_files():
109
+ files = [f.name for f in DATASET_DIR.glob("*") if f.is_file()]
110
+ return {"dataset_files": files}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests