Spaces:
Running
Running
Ahmed Mostafa commited on
Commit ·
8fd886e
1
Parent(s): d842b52
feat: implement multi-provider YouTube transcript downloader with fallback support
Browse files- Dockerfile +9 -2
- src/api/downloader.py +29 -0
Dockerfile
CHANGED
|
@@ -1,12 +1,19 @@
|
|
| 1 |
# 1. اختيار النسخة الأساسية
|
| 2 |
-
FROM python:3.
|
| 3 |
|
| 4 |
# 2. تسطيب برامج النظام (ffmpeg للتعامل مع الصوت)
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
| 6 |
ffmpeg \
|
|
|
|
| 7 |
curl \
|
|
|
|
| 8 |
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# 3. تجهيز فولدر المشروع
|
| 11 |
WORKDIR /app
|
| 12 |
|
|
@@ -22,4 +29,4 @@ RUN chown -R 1000:1000 /app
|
|
| 22 |
USER 1000
|
| 23 |
|
| 24 |
# 7. أمر تشغيل السيرفر الأساسي
|
| 25 |
-
CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
# 1. اختيار النسخة الأساسية
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
|
| 4 |
# 2. تسطيب برامج النظام (ffmpeg للتعامل مع الصوت)
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
+
ca-certificates \
|
| 7 |
ffmpeg \
|
| 8 |
+
openssl \
|
| 9 |
curl \
|
| 10 |
+
nodejs \
|
| 11 |
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
|
| 13 |
+
ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \
|
| 14 |
+
REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt \
|
| 15 |
+
CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
|
| 16 |
+
|
| 17 |
# 3. تجهيز فولدر المشروع
|
| 18 |
WORKDIR /app
|
| 19 |
|
|
|
|
| 29 |
USER 1000
|
| 30 |
|
| 31 |
# 7. أمر تشغيل السيرفر الأساسي
|
| 32 |
+
CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
src/api/downloader.py
CHANGED
|
@@ -18,6 +18,8 @@ _FAST_FAIL_SSL_MARKERS = (
|
|
| 18 |
"SSLEOFError",
|
| 19 |
"EOF occurred in violation of protocol",
|
| 20 |
"TLS",
|
|
|
|
|
|
|
| 21 |
)
|
| 22 |
|
| 23 |
|
|
@@ -185,6 +187,7 @@ class YouTubeDownloader:
|
|
| 185 |
'no_warnings': True,
|
| 186 |
'extract_flat': False,
|
| 187 |
}
|
|
|
|
| 188 |
self._apply_cookie_options(ydl_opts)
|
| 189 |
|
| 190 |
try:
|
|
@@ -254,6 +257,7 @@ class YouTubeDownloader:
|
|
| 254 |
}
|
| 255 |
],
|
| 256 |
}
|
|
|
|
| 257 |
self._apply_cookie_options(ydl_opts)
|
| 258 |
|
| 259 |
failures: List[str] = []
|
|
@@ -311,6 +315,31 @@ class YouTubeDownloader:
|
|
| 311 |
if cookie_file:
|
| 312 |
ydl_opts["cookiefile"] = str(cookie_file)
|
| 313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
def _resolve_cookie_file(self) -> Path | None:
|
| 315 |
if self._youtube_cookies_file:
|
| 316 |
cookie_path = Path(self._youtube_cookies_file)
|
|
|
|
| 18 |
"SSLEOFError",
|
| 19 |
"EOF occurred in violation of protocol",
|
| 20 |
"TLS",
|
| 21 |
+
"TLS connect error",
|
| 22 |
+
"invalid library",
|
| 23 |
)
|
| 24 |
|
| 25 |
|
|
|
|
| 187 |
'no_warnings': True,
|
| 188 |
'extract_flat': False,
|
| 189 |
}
|
| 190 |
+
self._apply_youtube_network_options(ydl_opts)
|
| 191 |
self._apply_cookie_options(ydl_opts)
|
| 192 |
|
| 193 |
try:
|
|
|
|
| 257 |
}
|
| 258 |
],
|
| 259 |
}
|
| 260 |
+
self._apply_youtube_network_options(ydl_opts)
|
| 261 |
self._apply_cookie_options(ydl_opts)
|
| 262 |
|
| 263 |
failures: List[str] = []
|
|
|
|
| 315 |
if cookie_file:
|
| 316 |
ydl_opts["cookiefile"] = str(cookie_file)
|
| 317 |
|
| 318 |
+
def _apply_youtube_network_options(self, ydl_opts: dict) -> None:
|
| 319 |
+
ydl_opts.update(
|
| 320 |
+
{
|
| 321 |
+
"source_address": "0.0.0.0",
|
| 322 |
+
"socket_timeout": 30,
|
| 323 |
+
"retries": 5,
|
| 324 |
+
"fragment_retries": 5,
|
| 325 |
+
"nocheckcertificate": True,
|
| 326 |
+
"geo_bypass": True,
|
| 327 |
+
"http_headers": {
|
| 328 |
+
"User-Agent": (
|
| 329 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 330 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 331 |
+
"Chrome/124.0.0.0 Safari/537.36"
|
| 332 |
+
),
|
| 333 |
+
"Accept-Language": "en-US,en;q=0.9",
|
| 334 |
+
},
|
| 335 |
+
"extractor_args": {
|
| 336 |
+
"youtube": {
|
| 337 |
+
"player_client": ["android", "web_safari", "tv"],
|
| 338 |
+
},
|
| 339 |
+
},
|
| 340 |
+
}
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
def _resolve_cookie_file(self) -> Path | None:
|
| 344 |
if self._youtube_cookies_file:
|
| 345 |
cookie_path = Path(self._youtube_cookies_file)
|