Fred808 commited on
Commit
852384a
Β·
verified Β·
1 Parent(s): e844b7b

Upload 3 files

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -4
  2. main.py +238 -153
Dockerfile CHANGED
@@ -10,11 +10,12 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
10
  # Set work directory
11
  WORKDIR /app
12
 
13
- # Install system dependencies
14
  RUN apt-get update && apt-get install -y \
15
  ffmpeg \
16
  curl \
17
  wget \
 
18
  && rm -rf /var/lib/apt/lists/*
19
 
20
  # Copy requirements first for better caching
@@ -27,7 +28,7 @@ RUN pip install --no-cache-dir --upgrade pip && \
27
  # Copy application code
28
  COPY . .
29
 
30
- # Create downloads directory
31
  RUN mkdir -p /tmp/downloads && \
32
  chmod 755 /tmp/downloads
33
 
@@ -42,8 +43,8 @@ USER app
42
  # Expose port
43
  EXPOSE 7860
44
 
45
- # Health check
46
- HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
47
  CMD curl -f http://localhost:7860/health || exit 1
48
 
49
  # Run the application
 
10
  # Set work directory
11
  WORKDIR /app
12
 
13
+ # Install system dependencies including additional tools for anti-detection
14
  RUN apt-get update && apt-get install -y \
15
  ffmpeg \
16
  curl \
17
  wget \
18
+ ca-certificates \
19
  && rm -rf /var/lib/apt/lists/*
20
 
21
  # Copy requirements first for better caching
 
28
  # Copy application code
29
  COPY . .
30
 
31
+ # Create downloads directory with proper permissions
32
  RUN mkdir -p /tmp/downloads && \
33
  chmod 755 /tmp/downloads
34
 
 
43
  # Expose port
44
  EXPOSE 7860
45
 
46
+ # Health check with longer timeout for anti-detection measures
47
+ HEALTHCHECK --interval=60s --timeout=60s --start-period=10s --retries=3 \
48
  CMD curl -f http://localhost:7860/health || exit 1
49
 
50
  # Run the application
main.py CHANGED
@@ -1,7 +1,7 @@
1
  #!/usr/bin/env python3
2
  """
3
- FastAPI YouTube Video Downloader for Hugging Face Spaces
4
- A modern API service for downloading YouTube videos without cookies
5
  """
6
 
7
  import os
@@ -9,17 +9,17 @@ import sys
9
  import subprocess
10
  import json
11
  import tempfile
12
- import shutil
 
 
13
  import logging
14
  from pathlib import Path
15
  from typing import Optional, Dict, Any, List
16
  from datetime import datetime
17
- import asyncio
18
  from concurrent.futures import ThreadPoolExecutor
19
 
20
- from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File
21
  from fastapi.responses import FileResponse, HTMLResponse
22
- from fastapi.staticfiles import StaticFiles
23
  from fastapi.middleware.cors import CORSMiddleware
24
  from pydantic import BaseModel, HttpUrl
25
  import uvicorn
@@ -28,7 +28,7 @@ import uvicorn
28
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
29
  logger = logging.getLogger(__name__)
30
 
31
- # Pydantic models for request/response
32
  class VideoInfoRequest(BaseModel):
33
  url: HttpUrl
34
 
@@ -61,12 +61,13 @@ class HealthResponse(BaseModel):
61
  status: str
62
  yt_dlp_available: bool
63
  timestamp: str
 
64
 
65
  # Initialize FastAPI app
66
  app = FastAPI(
67
- title="YouTube Video Downloader",
68
- description="Download YouTube videos without cookies using yt-dlp",
69
- version="1.0.0",
70
  docs_url="/docs",
71
  redoc_url="/redoc"
72
  )
@@ -81,22 +82,91 @@ app.add_middleware(
81
  )
82
 
83
  # Thread pool for background tasks
84
- executor = ThreadPoolExecutor(max_workers=3)
85
 
86
- class YouTubeDownloader:
87
- """
88
- A class to download YouTube videos without cookies using yt-dlp
89
- """
90
 
91
- def __init__(self, download_dir: str = None):
92
- """
93
- Initialize the YouTube downloader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- Args:
96
- download_dir: Directory to save downloaded videos
97
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  if download_dir is None:
99
- # Use persistent storage if available, otherwise temp
100
  if os.path.exists('/data'):
101
  download_dir = '/data/downloads'
102
  else:
@@ -104,6 +174,7 @@ class YouTubeDownloader:
104
 
105
  self.download_dir = Path(download_dir)
106
  self.download_dir.mkdir(parents=True, exist_ok=True)
 
107
 
108
  # Ensure yt-dlp is available
109
  self._ensure_ytdlp_available()
@@ -124,18 +195,17 @@ class YouTubeDownloader:
124
  logger.error(f"Failed to install yt-dlp: {e}")
125
  raise RuntimeError("Could not install yt-dlp")
126
 
127
- def get_video_info(self, url: str) -> Optional[Dict[str, Any]]:
128
- """
129
- Get video information without downloading
 
 
 
 
 
130
 
131
- Args:
132
- url: YouTube video URL
133
-
134
- Returns:
135
- Dictionary containing video information or None if failed
136
- """
137
  try:
138
- cmd = [
139
  'yt-dlp',
140
  '--dump-json',
141
  '--no-download',
@@ -143,7 +213,12 @@ class YouTubeDownloader:
143
  str(url)
144
  ]
145
 
146
- result = subprocess.run(cmd, capture_output=True, text=True, check=True)
 
 
 
 
 
147
  video_info = json.loads(result.stdout)
148
 
149
  return {
@@ -160,74 +235,108 @@ class YouTubeDownloader:
160
  }
161
 
162
  except subprocess.CalledProcessError as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  logger.error(f"Failed to get video info: {e.stderr}")
164
  return None
165
- except json.JSONDecodeError as e:
166
- logger.error(f"Failed to parse video info JSON: {e}")
 
 
 
 
167
  return None
168
 
169
  def download_video(self, url: str, quality: str = "best",
170
- audio_only: bool = False) -> Optional[str]:
171
- """
172
- Download a YouTube video without cookies
 
 
 
 
 
173
 
174
- Args:
175
- url: YouTube video URL
176
- quality: Video quality (best, worst, or specific format)
177
- audio_only: If True, download only audio
178
-
179
- Returns:
180
- Path to downloaded file or None if failed
181
- """
182
  try:
183
- # Base command
184
- cmd = ['yt-dlp']
185
 
186
  # Set output directory and filename template
187
  output_template = str(self.download_dir / "%(title)s.%(ext)s")
188
- cmd.extend(['-o', output_template])
189
 
190
  # Set format/quality
191
  if audio_only:
192
- cmd.extend(['-f', 'bestaudio/best'])
193
  else:
194
  if quality == "best":
195
- cmd.extend(['-f', 'best[height<=720]']) # Limit to 720p for server efficiency
196
  elif quality == "worst":
197
- cmd.extend(['-f', 'worst'])
198
  else:
199
- cmd.extend(['-f', quality])
200
 
201
- # Add other useful options
202
- cmd.extend([
203
- '--no-cookies', # Explicitly no cookies
204
- '--no-check-certificates', # Skip SSL certificate verification if needed
205
- '--extract-flat', 'false', # Extract full metadata
206
- str(url)
207
- ])
208
 
209
- logger.info(f"Downloading video from: {url}")
210
 
211
- # Execute download
212
- result = subprocess.run(cmd, capture_output=True, text=True, check=True)
213
 
214
  logger.info("Download completed successfully")
215
 
216
  # Find the downloaded file
217
  downloaded_files = [f for f in self.download_dir.glob("*") if f.is_file()]
218
  if downloaded_files:
219
- # Return the most recently created file
220
  latest_file = max(downloaded_files, key=os.path.getctime)
221
  return str(latest_file)
222
 
223
  return None
224
 
225
  except subprocess.CalledProcessError as e:
 
 
 
 
 
 
 
 
 
 
226
  logger.error(f"Download failed: {e.stderr}")
227
  return None
 
 
 
 
 
 
228
 
229
  # Global downloader instance
230
- downloader = YouTubeDownloader()
231
 
232
  @app.get("/", response_class=HTMLResponse)
233
  async def read_root():
@@ -238,7 +347,7 @@ async def read_root():
238
  <head>
239
  <meta charset="UTF-8">
240
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
241
- <title>YouTube Video Downloader API</title>
242
  <style>
243
  body {
244
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
@@ -271,29 +380,19 @@ async def read_root():
271
  color: #666;
272
  font-size: 1.1em;
273
  }
274
- .api-info {
275
  background: #f8f9fa;
276
  border-radius: 8px;
277
  padding: 20px;
278
  margin: 20px 0;
279
  }
280
- .endpoint {
281
  background: white;
282
  border: 1px solid #dee2e6;
283
  border-radius: 5px;
284
  padding: 15px;
285
  margin: 10px 0;
286
  }
287
- .method {
288
- display: inline-block;
289
- padding: 4px 8px;
290
- border-radius: 4px;
291
- font-weight: bold;
292
- font-size: 0.8em;
293
- margin-right: 10px;
294
- }
295
- .get { background: #d4edda; color: #155724; }
296
- .post { background: #d1ecf1; color: #0c5460; }
297
  .btn {
298
  background: linear-gradient(135deg, #667eea, #764ba2);
299
  color: white;
@@ -308,56 +407,55 @@ async def read_root():
308
  .btn:hover {
309
  transform: translateY(-2px);
310
  }
311
- code {
312
- background: #f8f9fa;
313
- padding: 2px 6px;
314
- border-radius: 3px;
315
- font-family: 'Courier New', monospace;
 
 
316
  }
317
  </style>
318
  </head>
319
  <body>
320
  <div class="container">
321
  <div class="header">
322
- <h1>πŸŽ₯ YouTube Downloader API</h1>
323
- <p>FastAPI service for downloading YouTube videos without cookies</p>
324
  </div>
325
 
326
- <div class="api-info">
327
- <h3>πŸ“‹ Available Endpoints</h3>
 
 
 
 
 
 
 
 
 
328
 
329
- <div class="endpoint">
330
- <span class="method get">GET</span>
331
- <code>/health</code> - Check service health and yt-dlp availability
332
  </div>
333
 
334
- <div class="endpoint">
335
- <span class="method post">POST</span>
336
- <code>/video/info</code> - Get video information without downloading
337
  </div>
338
 
339
- <div class="endpoint">
340
- <span class="method post">POST</span>
341
- <code>/video/download</code> - Download a YouTube video
342
  </div>
343
 
344
- <div class="endpoint">
345
- <span class="method get">GET</span>
346
- <code>/video/file/{filename}</code> - Download a previously processed file
347
  </div>
348
  </div>
349
 
350
  <div style="text-align: center;">
351
- <a href="/docs" class="btn">πŸ“– Interactive API Documentation</a>
352
- <a href="/redoc" class="btn">πŸ“š ReDoc Documentation</a>
353
- </div>
354
-
355
- <div class="api-info">
356
- <h3>πŸš€ Quick Start Example</h3>
357
- <p>Get video information:</p>
358
- <pre><code>curl -X POST "http://localhost:8000/video/info" \\
359
- -H "Content-Type: application/json" \\
360
- -d '{"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"}'</code></pre>
361
  </div>
362
  </div>
363
  </body>
@@ -367,37 +465,48 @@ async def read_root():
367
 
368
  @app.get("/health", response_model=HealthResponse)
369
  async def health_check():
370
- """Health check endpoint"""
371
  try:
372
- # Check if yt-dlp is available
373
  subprocess.run(['yt-dlp', '--version'], capture_output=True, check=True)
374
  yt_dlp_available = True
375
  except:
376
  yt_dlp_available = False
377
 
 
 
 
 
 
 
 
 
 
378
  return HealthResponse(
379
  status="healthy" if yt_dlp_available else "unhealthy",
380
  yt_dlp_available=yt_dlp_available,
381
- timestamp=datetime.now().isoformat()
 
382
  )
383
 
384
  @app.post("/video/info", response_model=Dict[str, Any])
385
  async def get_video_info(request: VideoInfoRequest):
386
- """Get video information without downloading"""
387
  try:
388
- # Validate URL
389
  url_str = str(request.url)
390
  if not any(domain in url_str for domain in ['youtube.com', 'youtu.be']):
391
  raise HTTPException(status_code=400, detail="Invalid YouTube URL")
392
 
393
- # Get video info in thread pool to avoid blocking
394
  loop = asyncio.get_event_loop()
395
  info = await loop.run_in_executor(executor, downloader.get_video_info, url_str)
396
 
397
  if info:
398
  return {"success": True, "info": info}
399
  else:
400
- raise HTTPException(status_code=500, detail="Failed to get video information")
 
 
 
401
 
402
  except HTTPException:
403
  raise
@@ -407,9 +516,8 @@ async def get_video_info(request: VideoInfoRequest):
407
 
408
  @app.post("/video/download", response_model=DownloadResponse)
409
  async def download_video(request: DownloadRequest, background_tasks: BackgroundTasks):
410
- """Download a YouTube video"""
411
  try:
412
- # Validate URL
413
  url_str = str(request.url)
414
  if not any(domain in url_str for domain in ['youtube.com', 'youtu.be']):
415
  raise HTTPException(status_code=400, detail="Invalid YouTube URL")
@@ -418,7 +526,10 @@ async def download_video(request: DownloadRequest, background_tasks: BackgroundT
418
  loop = asyncio.get_event_loop()
419
  info = await loop.run_in_executor(executor, downloader.get_video_info, url_str)
420
  if not info:
421
- raise HTTPException(status_code=500, detail="Failed to get video information")
 
 
 
422
 
423
  # Download the video
424
  downloaded_file = await loop.run_in_executor(
@@ -433,19 +544,22 @@ async def download_video(request: DownloadRequest, background_tasks: BackgroundT
433
  file_size = os.path.getsize(downloaded_file)
434
  filename = os.path.basename(downloaded_file)
435
 
436
- # Schedule cleanup after 1 hour
437
- background_tasks.add_task(cleanup_file, downloaded_file, delay=3600)
438
 
439
  return DownloadResponse(
440
  success=True,
441
- message="Video downloaded successfully",
442
  filename=filename,
443
  file_size=file_size,
444
  video_info=VideoInfo(**info),
445
  download_path=downloaded_file
446
  )
447
  else:
448
- raise HTTPException(status_code=500, detail="Failed to download video")
 
 
 
449
 
450
  except HTTPException:
451
  raise
@@ -457,10 +571,8 @@ async def download_video(request: DownloadRequest, background_tasks: BackgroundT
457
  async def download_file(filename: str):
458
  """Serve downloaded files"""
459
  try:
460
- # Security: only allow files from download directory
461
  file_path = downloader.download_dir / filename
462
 
463
- # Check if file exists and is within download directory
464
  if not file_path.exists() or not str(file_path.resolve()).startswith(str(downloader.download_dir.resolve())):
465
  raise HTTPException(status_code=404, detail="File not found")
466
 
@@ -476,33 +588,7 @@ async def download_file(filename: str):
476
  logger.error(f"Error serving file: {e}")
477
  raise HTTPException(status_code=500, detail=str(e))
478
 
479
- @app.get("/video/formats")
480
- async def list_formats(url: HttpUrl):
481
- """List available formats for a video"""
482
- try:
483
- url_str = str(url)
484
- if not any(domain in url_str for domain in ['youtube.com', 'youtu.be']):
485
- raise HTTPException(status_code=400, detail="Invalid YouTube URL")
486
-
487
- cmd = ['yt-dlp', '--list-formats', '--no-cookies', url_str]
488
-
489
- loop = asyncio.get_event_loop()
490
- result = await loop.run_in_executor(
491
- executor,
492
- lambda: subprocess.run(cmd, capture_output=True, text=True, check=True)
493
- )
494
-
495
- formats = result.stdout.split('\n')
496
- return {"success": True, "formats": formats}
497
-
498
- except subprocess.CalledProcessError as e:
499
- logger.error(f"Failed to list formats: {e.stderr}")
500
- raise HTTPException(status_code=500, detail="Failed to list formats")
501
- except Exception as e:
502
- logger.error(f"Error listing formats: {e}")
503
- raise HTTPException(status_code=500, detail=str(e))
504
-
505
- async def cleanup_file(file_path: str, delay: int = 3600):
506
  """Clean up downloaded file after delay"""
507
  await asyncio.sleep(delay)
508
  try:
@@ -513,7 +599,6 @@ async def cleanup_file(file_path: str, delay: int = 3600):
513
  logger.error(f"Failed to cleanup file {file_path}: {e}")
514
 
515
  if __name__ == "__main__":
516
- # Get port from environment variable (Hugging Face Spaces uses port 7860)
517
  port = int(os.environ.get("PORT", 7860))
518
 
519
  uvicorn.run(
 
1
  #!/usr/bin/env python3
2
  """
3
+ Enhanced FastAPI YouTube Video Downloader with Anti-Bot Measures
4
+ Implements strategies to bypass YouTube's rate limiting and bot detection
5
  """
6
 
7
  import os
 
9
  import subprocess
10
  import json
11
  import tempfile
12
+ import random
13
+ import time
14
+ import asyncio
15
  import logging
16
  from pathlib import Path
17
  from typing import Optional, Dict, Any, List
18
  from datetime import datetime
 
19
  from concurrent.futures import ThreadPoolExecutor
20
 
21
+ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
22
  from fastapi.responses import FileResponse, HTMLResponse
 
23
  from fastapi.middleware.cors import CORSMiddleware
24
  from pydantic import BaseModel, HttpUrl
25
  import uvicorn
 
28
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
29
  logger = logging.getLogger(__name__)
30
 
31
+ # Pydantic models
32
  class VideoInfoRequest(BaseModel):
33
  url: HttpUrl
34
 
 
61
  status: str
62
  yt_dlp_available: bool
63
  timestamp: str
64
+ strategies_enabled: List[str]
65
 
66
  # Initialize FastAPI app
67
  app = FastAPI(
68
+ title="Enhanced YouTube Video Downloader",
69
+ description="Download YouTube videos with anti-bot measures and rate limiting bypass",
70
+ version="2.0.0",
71
  docs_url="/docs",
72
  redoc_url="/redoc"
73
  )
 
82
  )
83
 
84
  # Thread pool for background tasks
85
+ executor = ThreadPoolExecutor(max_workers=2) # Reduced to avoid overwhelming YouTube
86
 
87
+ class AntiDetectionManager:
88
+ """Manages anti-detection strategies for YouTube downloading"""
 
 
89
 
90
+ def __init__(self):
91
+ self.user_agents = [
92
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
93
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
94
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
95
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
96
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0',
97
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0',
98
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0 Safari/537.36',
99
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15'
100
+ ]
101
+
102
+ self.sleep_intervals = [1, 2, 3, 5] # Random sleep intervals
103
+ self.last_request_time = 0
104
+ self.request_count = 0
105
+ self.max_requests_per_minute = 10
106
+
107
+ def get_random_user_agent(self) -> str:
108
+ """Get a random user agent"""
109
+ return random.choice(self.user_agents)
110
+
111
+ def get_sleep_interval(self) -> int:
112
+ """Get a random sleep interval"""
113
+ return random.choice(self.sleep_intervals)
114
+
115
+ def should_rate_limit(self) -> bool:
116
+ """Check if we should rate limit requests"""
117
+ current_time = time.time()
118
+
119
+ # Reset counter every minute
120
+ if current_time - self.last_request_time > 60:
121
+ self.request_count = 0
122
+ self.last_request_time = current_time
123
+
124
+ self.request_count += 1
125
+
126
+ # Rate limit if too many requests
127
+ if self.request_count > self.max_requests_per_minute:
128
+ return True
129
+
130
+ return False
131
+
132
+ def get_enhanced_command(self, base_cmd: List[str]) -> List[str]:
133
+ """Enhance yt-dlp command with anti-detection measures"""
134
+ enhanced_cmd = base_cmd.copy()
135
+
136
+ # Add user agent
137
+ enhanced_cmd.extend(['--user-agent', self.get_random_user_agent()])
138
+
139
+ # Add sleep interval
140
+ enhanced_cmd.extend(['--sleep-interval', str(self.get_sleep_interval())])
141
+
142
+ # Add retry options
143
+ enhanced_cmd.extend(['--retries', '3'])
144
+ enhanced_cmd.extend(['--fragment-retries', '3'])
145
+
146
+ # Add socket timeout
147
+ enhanced_cmd.extend(['--socket-timeout', '30'])
148
 
149
+ # Disable certificate checking (sometimes helps)
150
+ enhanced_cmd.extend(['--no-check-certificates'])
151
+
152
+ # Add geo bypass
153
+ enhanced_cmd.extend(['--geo-bypass'])
154
+
155
+ # Add additional headers to look more like a browser
156
+ enhanced_cmd.extend(['--add-header', 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'])
157
+ enhanced_cmd.extend(['--add-header', 'Accept-Language:en-US,en;q=0.5'])
158
+ enhanced_cmd.extend(['--add-header', 'Accept-Encoding:gzip, deflate'])
159
+ enhanced_cmd.extend(['--add-header', 'DNT:1'])
160
+ enhanced_cmd.extend(['--add-header', 'Connection:keep-alive'])
161
+ enhanced_cmd.extend(['--add-header', 'Upgrade-Insecure-Requests:1'])
162
+
163
+ return enhanced_cmd
164
+
165
+ class EnhancedYouTubeDownloader:
166
+ """Enhanced YouTube downloader with anti-detection measures"""
167
+
168
+ def __init__(self, download_dir: str = None):
169
  if download_dir is None:
 
170
  if os.path.exists('/data'):
171
  download_dir = '/data/downloads'
172
  else:
 
174
 
175
  self.download_dir = Path(download_dir)
176
  self.download_dir.mkdir(parents=True, exist_ok=True)
177
+ self.anti_detection = AntiDetectionManager()
178
 
179
  # Ensure yt-dlp is available
180
  self._ensure_ytdlp_available()
 
195
  logger.error(f"Failed to install yt-dlp: {e}")
196
  raise RuntimeError("Could not install yt-dlp")
197
 
198
+ def get_video_info(self, url: str, retry_count: int = 0) -> Optional[Dict[str, Any]]:
199
+ """Get video information with anti-detection measures"""
200
+ max_retries = 3
201
+
202
+ # Check rate limiting
203
+ if self.anti_detection.should_rate_limit():
204
+ logger.warning("Rate limiting applied - waiting before request")
205
+ time.sleep(30) # Wait 30 seconds if rate limited
206
 
 
 
 
 
 
 
207
  try:
208
+ base_cmd = [
209
  'yt-dlp',
210
  '--dump-json',
211
  '--no-download',
 
213
  str(url)
214
  ]
215
 
216
+ # Enhance command with anti-detection measures
217
+ cmd = self.anti_detection.get_enhanced_command(base_cmd)
218
+
219
+ logger.info(f"Executing command (attempt {retry_count + 1}): {' '.join(cmd[:5])}...")
220
+
221
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=60)
222
  video_info = json.loads(result.stdout)
223
 
224
  return {
 
235
  }
236
 
237
  except subprocess.CalledProcessError as e:
238
+ error_msg = e.stderr.lower() if e.stderr else ""
239
+
240
+ # Handle specific error cases
241
+ if "429" in error_msg or "too many requests" in error_msg:
242
+ if retry_count < max_retries:
243
+ wait_time = (retry_count + 1) * 30 # Exponential backoff
244
+ logger.warning(f"Rate limited (429), waiting {wait_time}s before retry {retry_count + 1}")
245
+ time.sleep(wait_time)
246
+ return self.get_video_info(url, retry_count + 1)
247
+ else:
248
+ logger.error("Max retries exceeded for rate limiting")
249
+ return None
250
+
251
+ elif "sign in" in error_msg or "bot" in error_msg:
252
+ if retry_count < max_retries:
253
+ wait_time = (retry_count + 1) * 60 # Longer wait for bot detection
254
+ logger.warning(f"Bot detection triggered, waiting {wait_time}s before retry {retry_count + 1}")
255
+ time.sleep(wait_time)
256
+ return self.get_video_info(url, retry_count + 1)
257
+ else:
258
+ logger.error("Max retries exceeded for bot detection")
259
+ return None
260
+
261
  logger.error(f"Failed to get video info: {e.stderr}")
262
  return None
263
+
264
+ except (json.JSONDecodeError, subprocess.TimeoutExpired) as e:
265
+ logger.error(f"Error processing video info: {e}")
266
+ if retry_count < max_retries:
267
+ time.sleep(10)
268
+ return self.get_video_info(url, retry_count + 1)
269
  return None
270
 
271
  def download_video(self, url: str, quality: str = "best",
272
+ audio_only: bool = False, retry_count: int = 0) -> Optional[str]:
273
+ """Download video with anti-detection measures"""
274
+ max_retries = 2
275
+
276
+ # Check rate limiting
277
+ if self.anti_detection.should_rate_limit():
278
+ logger.warning("Rate limiting applied - waiting before download")
279
+ time.sleep(30)
280
 
 
 
 
 
 
 
 
 
281
  try:
282
+ base_cmd = ['yt-dlp']
 
283
 
284
  # Set output directory and filename template
285
  output_template = str(self.download_dir / "%(title)s.%(ext)s")
286
+ base_cmd.extend(['-o', output_template])
287
 
288
  # Set format/quality
289
  if audio_only:
290
+ base_cmd.extend(['-f', 'bestaudio/best'])
291
  else:
292
  if quality == "best":
293
+ base_cmd.extend(['-f', 'best[height<=720]'])
294
  elif quality == "worst":
295
+ base_cmd.extend(['-f', 'worst'])
296
  else:
297
+ base_cmd.extend(['-f', quality])
298
 
299
+ base_cmd.extend(['--no-cookies', str(url)])
300
+
301
+ # Enhance command with anti-detection measures
302
+ cmd = self.anti_detection.get_enhanced_command(base_cmd)
 
 
 
303
 
304
+ logger.info(f"Downloading video (attempt {retry_count + 1}): {url}")
305
 
306
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=300)
 
307
 
308
  logger.info("Download completed successfully")
309
 
310
  # Find the downloaded file
311
  downloaded_files = [f for f in self.download_dir.glob("*") if f.is_file()]
312
  if downloaded_files:
 
313
  latest_file = max(downloaded_files, key=os.path.getctime)
314
  return str(latest_file)
315
 
316
  return None
317
 
318
  except subprocess.CalledProcessError as e:
319
+ error_msg = e.stderr.lower() if e.stderr else ""
320
+
321
+ if ("429" in error_msg or "too many requests" in error_msg or
322
+ "sign in" in error_msg or "bot" in error_msg):
323
+ if retry_count < max_retries:
324
+ wait_time = (retry_count + 1) * 60
325
+ logger.warning(f"Download blocked, waiting {wait_time}s before retry {retry_count + 1}")
326
+ time.sleep(wait_time)
327
+ return self.download_video(url, quality, audio_only, retry_count + 1)
328
+
329
  logger.error(f"Download failed: {e.stderr}")
330
  return None
331
+
332
+ except subprocess.TimeoutExpired:
333
+ logger.error("Download timeout")
334
+ if retry_count < max_retries:
335
+ return self.download_video(url, quality, audio_only, retry_count + 1)
336
+ return None
337
 
338
  # Global downloader instance
339
+ downloader = EnhancedYouTubeDownloader()
340
 
341
  @app.get("/", response_class=HTMLResponse)
342
  async def read_root():
 
347
  <head>
348
  <meta charset="UTF-8">
349
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
350
+ <title>Enhanced YouTube Video Downloader</title>
351
  <style>
352
  body {
353
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 
380
  color: #666;
381
  font-size: 1.1em;
382
  }
383
+ .features {
384
  background: #f8f9fa;
385
  border-radius: 8px;
386
  padding: 20px;
387
  margin: 20px 0;
388
  }
389
+ .feature {
390
  background: white;
391
  border: 1px solid #dee2e6;
392
  border-radius: 5px;
393
  padding: 15px;
394
  margin: 10px 0;
395
  }
 
 
 
 
 
 
 
 
 
 
396
  .btn {
397
  background: linear-gradient(135deg, #667eea, #764ba2);
398
  color: white;
 
407
  .btn:hover {
408
  transform: translateY(-2px);
409
  }
410
+ .warning {
411
+ background: #fff3cd;
412
+ border: 1px solid #ffeaa7;
413
+ border-radius: 5px;
414
+ padding: 15px;
415
+ margin: 20px 0;
416
+ color: #856404;
417
  }
418
  </style>
419
  </head>
420
  <body>
421
  <div class="container">
422
  <div class="header">
423
+ <h1>πŸ›‘οΈ Enhanced YouTube Downloader</h1>
424
+ <p>Advanced anti-detection measures for reliable video downloading</p>
425
  </div>
426
 
427
+ <div class="warning">
428
+ <strong>⚠️ Important:</strong> This enhanced version includes anti-bot measures to handle YouTube's restrictions.
429
+ Downloads may take longer due to rate limiting and retry mechanisms.
430
+ </div>
431
+
432
+ <div class="features">
433
+ <h3>πŸš€ Enhanced Features</h3>
434
+
435
+ <div class="feature">
436
+ <strong>πŸ”„ Smart Retry Logic:</strong> Automatically retries failed requests with exponential backoff
437
+ </div>
438
 
439
+ <div class="feature">
440
+ <strong>🎭 User-Agent Rotation:</strong> Randomizes browser signatures to avoid detection
 
441
  </div>
442
 
443
+ <div class="feature">
444
+ <strong>⏱️ Rate Limiting:</strong> Intelligent request spacing to prevent 429 errors
 
445
  </div>
446
 
447
+ <div class="feature">
448
+ <strong>🌐 Enhanced Headers:</strong> Mimics real browser behavior with proper headers
 
449
  </div>
450
 
451
+ <div class="feature">
452
+ <strong>πŸ”§ Timeout Handling:</strong> Robust timeout and error recovery mechanisms
 
453
  </div>
454
  </div>
455
 
456
  <div style="text-align: center;">
457
+ <a href="/docs" class="btn">πŸ“– API Documentation</a>
458
+ <a href="/health" class="btn">πŸ₯ Health Check</a>
 
 
 
 
 
 
 
 
459
  </div>
460
  </div>
461
  </body>
 
465
 
466
  @app.get("/health", response_model=HealthResponse)
467
  async def health_check():
468
+ """Enhanced health check with strategy information"""
469
  try:
 
470
  subprocess.run(['yt-dlp', '--version'], capture_output=True, check=True)
471
  yt_dlp_available = True
472
  except:
473
  yt_dlp_available = False
474
 
475
+ strategies = [
476
+ "User-Agent Rotation",
477
+ "Rate Limiting",
478
+ "Smart Retry Logic",
479
+ "Enhanced Headers",
480
+ "Timeout Handling",
481
+ "Exponential Backoff"
482
+ ]
483
+
484
  return HealthResponse(
485
  status="healthy" if yt_dlp_available else "unhealthy",
486
  yt_dlp_available=yt_dlp_available,
487
+ timestamp=datetime.now().isoformat(),
488
+ strategies_enabled=strategies
489
  )
490
 
491
  @app.post("/video/info", response_model=Dict[str, Any])
492
  async def get_video_info(request: VideoInfoRequest):
493
+ """Get video information with enhanced anti-detection"""
494
  try:
 
495
  url_str = str(request.url)
496
  if not any(domain in url_str for domain in ['youtube.com', 'youtu.be']):
497
  raise HTTPException(status_code=400, detail="Invalid YouTube URL")
498
 
499
+ # Get video info in thread pool with enhanced measures
500
  loop = asyncio.get_event_loop()
501
  info = await loop.run_in_executor(executor, downloader.get_video_info, url_str)
502
 
503
  if info:
504
  return {"success": True, "info": info}
505
  else:
506
+ raise HTTPException(
507
+ status_code=503,
508
+ detail="Failed to get video information. YouTube may be blocking requests. Please try again later."
509
+ )
510
 
511
  except HTTPException:
512
  raise
 
516
 
517
  @app.post("/video/download", response_model=DownloadResponse)
518
  async def download_video(request: DownloadRequest, background_tasks: BackgroundTasks):
519
+ """Download video with enhanced anti-detection"""
520
  try:
 
521
  url_str = str(request.url)
522
  if not any(domain in url_str for domain in ['youtube.com', 'youtu.be']):
523
  raise HTTPException(status_code=400, detail="Invalid YouTube URL")
 
526
  loop = asyncio.get_event_loop()
527
  info = await loop.run_in_executor(executor, downloader.get_video_info, url_str)
528
  if not info:
529
+ raise HTTPException(
530
+ status_code=503,
531
+ detail="Failed to get video information. YouTube may be blocking requests."
532
+ )
533
 
534
  # Download the video
535
  downloaded_file = await loop.run_in_executor(
 
544
  file_size = os.path.getsize(downloaded_file)
545
  filename = os.path.basename(downloaded_file)
546
 
547
+ # Schedule cleanup after 2 hours (longer due to potential delays)
548
+ background_tasks.add_task(cleanup_file, downloaded_file, delay=7200)
549
 
550
  return DownloadResponse(
551
  success=True,
552
+ message="Video downloaded successfully with anti-detection measures",
553
  filename=filename,
554
  file_size=file_size,
555
  video_info=VideoInfo(**info),
556
  download_path=downloaded_file
557
  )
558
  else:
559
+ raise HTTPException(
560
+ status_code=503,
561
+ detail="Failed to download video. YouTube may be blocking requests. Please try again later."
562
+ )
563
 
564
  except HTTPException:
565
  raise
 
571
  async def download_file(filename: str):
572
  """Serve downloaded files"""
573
  try:
 
574
  file_path = downloader.download_dir / filename
575
 
 
576
  if not file_path.exists() or not str(file_path.resolve()).startswith(str(downloader.download_dir.resolve())):
577
  raise HTTPException(status_code=404, detail="File not found")
578
 
 
588
  logger.error(f"Error serving file: {e}")
589
  raise HTTPException(status_code=500, detail=str(e))
590
 
591
+ async def cleanup_file(file_path: str, delay: int = 7200):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  """Clean up downloaded file after delay"""
593
  await asyncio.sleep(delay)
594
  try:
 
599
  logger.error(f"Failed to cleanup file {file_path}: {e}")
600
 
601
  if __name__ == "__main__":
 
602
  port = int(os.environ.get("PORT", 7860))
603
 
604
  uvicorn.run(