Ahmed Mostafa commited on
Commit
8fd886e
·
1 Parent(s): d842b52

feat: implement multi-provider YouTube transcript downloader with fallback support

Browse files
Files changed (2) hide show
  1. Dockerfile +9 -2
  2. src/api/downloader.py +29 -0
Dockerfile CHANGED
@@ -1,12 +1,19 @@
1
  # 1. اختيار النسخة الأساسية
2
- FROM python:3.10-slim
3
 
4
  # 2. تسطيب برامج النظام (ffmpeg للتعامل مع الصوت)
5
  RUN apt-get update && apt-get install -y --no-install-recommends \
 
6
  ffmpeg \
 
7
  curl \
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
 
 
 
 
10
  # 3. تجهيز فولدر المشروع
11
  WORKDIR /app
12
 
@@ -22,4 +29,4 @@ RUN chown -R 1000:1000 /app
22
  USER 1000
23
 
24
  # 7. أمر تشغيل السيرفر الأساسي
25
- CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  # 1. اختيار النسخة الأساسية
2
+ FROM python:3.11-slim
3
 
4
  # 2. تسطيب برامج النظام (ffmpeg للتعامل مع الصوت)
5
  RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ ca-certificates \
7
  ffmpeg \
8
+ openssl \
9
  curl \
10
+ nodejs \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
+ ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \
14
+ REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt \
15
+ CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
16
+
17
  # 3. تجهيز فولدر المشروع
18
  WORKDIR /app
19
 
 
29
  USER 1000
30
 
31
  # 7. أمر تشغيل السيرفر الأساسي
32
+ CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "7860"]
src/api/downloader.py CHANGED
@@ -18,6 +18,8 @@ _FAST_FAIL_SSL_MARKERS = (
18
  "SSLEOFError",
19
  "EOF occurred in violation of protocol",
20
  "TLS",
 
 
21
  )
22
 
23
 
@@ -185,6 +187,7 @@ class YouTubeDownloader:
185
  'no_warnings': True,
186
  'extract_flat': False,
187
  }
 
188
  self._apply_cookie_options(ydl_opts)
189
 
190
  try:
@@ -254,6 +257,7 @@ class YouTubeDownloader:
254
  }
255
  ],
256
  }
 
257
  self._apply_cookie_options(ydl_opts)
258
 
259
  failures: List[str] = []
@@ -311,6 +315,31 @@ class YouTubeDownloader:
311
  if cookie_file:
312
  ydl_opts["cookiefile"] = str(cookie_file)
313
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  def _resolve_cookie_file(self) -> Path | None:
315
  if self._youtube_cookies_file:
316
  cookie_path = Path(self._youtube_cookies_file)
 
18
  "SSLEOFError",
19
  "EOF occurred in violation of protocol",
20
  "TLS",
21
+ "TLS connect error",
22
+ "invalid library",
23
  )
24
 
25
 
 
187
  'no_warnings': True,
188
  'extract_flat': False,
189
  }
190
+ self._apply_youtube_network_options(ydl_opts)
191
  self._apply_cookie_options(ydl_opts)
192
 
193
  try:
 
257
  }
258
  ],
259
  }
260
+ self._apply_youtube_network_options(ydl_opts)
261
  self._apply_cookie_options(ydl_opts)
262
 
263
  failures: List[str] = []
 
315
  if cookie_file:
316
  ydl_opts["cookiefile"] = str(cookie_file)
317
 
318
+ def _apply_youtube_network_options(self, ydl_opts: dict) -> None:
319
+ ydl_opts.update(
320
+ {
321
+ "source_address": "0.0.0.0",
322
+ "socket_timeout": 30,
323
+ "retries": 5,
324
+ "fragment_retries": 5,
325
+ "nocheckcertificate": True,
326
+ "geo_bypass": True,
327
+ "http_headers": {
328
+ "User-Agent": (
329
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
330
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
331
+ "Chrome/124.0.0.0 Safari/537.36"
332
+ ),
333
+ "Accept-Language": "en-US,en;q=0.9",
334
+ },
335
+ "extractor_args": {
336
+ "youtube": {
337
+ "player_client": ["android", "web_safari", "tv"],
338
+ },
339
+ },
340
+ }
341
+ )
342
+
343
  def _resolve_cookie_file(self) -> Path | None:
344
  if self._youtube_cookies_file:
345
  cookie_path = Path(self._youtube_cookies_file)