Spaces:
Running
Running
Ahmed Mostafa commited on
Commit ·
75182b8
1
Parent(s): 87a52fb
fix youtube downloader v1.1
Browse files- requirements.txt +1 -0
- src/api/main.py +48 -29
- src/transcription/audio_downloader.py +61 -0
requirements.txt
CHANGED
|
@@ -27,3 +27,4 @@ google-api-python-client==2.115.0
|
|
| 27 |
pydantic-core==2.41.5
|
| 28 |
ffmpeg-python
|
| 29 |
firebase-admin==6.5.0
|
|
|
|
|
|
| 27 |
pydantic-core==2.41.5
|
| 28 |
ffmpeg-python
|
| 29 |
firebase-admin==6.5.0
|
| 30 |
+
dnspython
|
src/api/main.py
CHANGED
|
@@ -68,6 +68,54 @@ app.include_router(auth_router)
|
|
| 68 |
app.include_router(notes_router)
|
| 69 |
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
@app.post("/generate", response_model=TaskResponse)
|
| 72 |
async def generate(
|
| 73 |
request: GenerateNotesRequest,
|
|
@@ -152,35 +200,6 @@ async def process_video_and_save(
|
|
| 152 |
downloader.cleanup(audio_file)
|
| 153 |
|
| 154 |
|
| 155 |
-
@app.get("/health")
|
| 156 |
-
async def health_check():
|
| 157 |
-
import socket
|
| 158 |
-
import httpx
|
| 159 |
-
|
| 160 |
-
dns_results = {}
|
| 161 |
-
for domain in ["www.youtube.com", "google.com", "huggingface.co"]:
|
| 162 |
-
try:
|
| 163 |
-
dns_results[domain] = socket.gethostbyname(domain)
|
| 164 |
-
except Exception as e:
|
| 165 |
-
dns_results[domain] = f"Error: {e}"
|
| 166 |
-
|
| 167 |
-
connectivity = {}
|
| 168 |
-
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 169 |
-
for url in ["https://www.google.com", "https://www.youtube.com"]:
|
| 170 |
-
try:
|
| 171 |
-
resp = await client.get(url, follow_redirects=True)
|
| 172 |
-
connectivity[url] = f"OK ({resp.status_code})"
|
| 173 |
-
except Exception as e:
|
| 174 |
-
connectivity[url] = f"Failed: {e}"
|
| 175 |
-
|
| 176 |
-
return {
|
| 177 |
-
"status": "online",
|
| 178 |
-
"dns": dns_results,
|
| 179 |
-
"connectivity": connectivity,
|
| 180 |
-
"timestamp": datetime.now()
|
| 181 |
-
}
|
| 182 |
-
|
| 183 |
-
|
| 184 |
@app.get("/status/{task_id}")
|
| 185 |
async def get_task_status(task_id: str):
|
| 186 |
if task_id not in tasks:
|
|
|
|
| 68 |
app.include_router(notes_router)
|
| 69 |
|
| 70 |
|
| 71 |
+
@app.get("/health")
|
| 72 |
+
async def health_check():
|
| 73 |
+
import socket
|
| 74 |
+
import httpx
|
| 75 |
+
try:
|
| 76 |
+
import dns.resolver
|
| 77 |
+
has_dnspython = True
|
| 78 |
+
except ImportError:
|
| 79 |
+
has_dnspython = False
|
| 80 |
+
|
| 81 |
+
dns_results = {}
|
| 82 |
+
for domain in ["www.youtube.com", "google.com"]:
|
| 83 |
+
# System DNS
|
| 84 |
+
try:
|
| 85 |
+
dns_results[f"{domain}_sys"] = socket.gethostbyname(domain)
|
| 86 |
+
except Exception as e:
|
| 87 |
+
dns_results[f"{domain}_sys"] = f"Error: {e}"
|
| 88 |
+
|
| 89 |
+
# dnspython (External DNS 8.8.8.8)
|
| 90 |
+
if has_dnspython:
|
| 91 |
+
try:
|
| 92 |
+
resolver = dns.resolver.Resolver()
|
| 93 |
+
resolver.nameservers = ['8.8.8.8']
|
| 94 |
+
answer = resolver.resolve(domain, 'A')
|
| 95 |
+
dns_results[f"{domain}_ext"] = [str(rdata) for rdata in answer]
|
| 96 |
+
except Exception as e:
|
| 97 |
+
dns_results[f"{domain}_ext"] = f"Error: {repr(e)}"
|
| 98 |
+
else:
|
| 99 |
+
dns_results[f"{domain}_ext"] = "dnspython not installed"
|
| 100 |
+
|
| 101 |
+
connectivity = {}
|
| 102 |
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 103 |
+
for url in ["https://www.youtube.com"]:
|
| 104 |
+
try:
|
| 105 |
+
resp = await client.get(url, follow_redirects=True)
|
| 106 |
+
connectivity[url] = f"OK ({resp.status_code})"
|
| 107 |
+
except Exception as e:
|
| 108 |
+
connectivity[url] = f"Failed: {repr(e)}"
|
| 109 |
+
|
| 110 |
+
return {
|
| 111 |
+
"status": "v6-online",
|
| 112 |
+
"dnspython": has_dnspython,
|
| 113 |
+
"dns": dns_results,
|
| 114 |
+
"connectivity": connectivity,
|
| 115 |
+
"timestamp": datetime.now()
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
@app.post("/generate", response_model=TaskResponse)
|
| 120 |
async def generate(
|
| 121 |
request: GenerateNotesRequest,
|
|
|
|
| 200 |
downloader.cleanup(audio_file)
|
| 201 |
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
@app.get("/status/{task_id}")
|
| 204 |
async def get_task_status(task_id: str):
|
| 205 |
if task_id not in tasks:
|
src/transcription/audio_downloader.py
CHANGED
|
@@ -4,10 +4,44 @@ Uses yt-dlp for robust YouTube video handling.
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import re
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Dict, Optional
|
| 9 |
import yt_dlp
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from src.utils.logger import setup_logger
|
| 12 |
from src.utils.config import settings
|
| 13 |
|
|
@@ -26,6 +60,33 @@ class YouTubeDownloader:
|
|
| 26 |
"""
|
| 27 |
self.output_dir = output_dir or settings.temp_dir
|
| 28 |
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
@staticmethod
|
| 31 |
def is_valid_youtube_url(url: str) -> bool:
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import re
|
| 7 |
+
import socket
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import Dict, Optional
|
| 10 |
import yt_dlp
|
| 11 |
|
| 12 |
+
try:
|
| 13 |
+
import dns.resolver
|
| 14 |
+
HAS_DNSPYTHON = True
|
| 15 |
+
except ImportError:
|
| 16 |
+
HAS_DNSPYTHON = False
|
| 17 |
+
|
| 18 |
+
# --- DNS Monkeypatch (v6) ---
|
| 19 |
+
# Some cloud hosts (Hugging Face) have broken DNS resolvers.
|
| 20 |
+
# This patch forces a fallback to Google DNS (8.8.8.8) if the system resolver fails.
|
| 21 |
+
_orig_getaddrinfo = socket.getaddrinfo
|
| 22 |
+
|
| 23 |
+
def _custom_getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
|
| 24 |
+
try:
|
| 25 |
+
return _orig_getaddrinfo(host, port, family, type, proto, flags)
|
| 26 |
+
except socket.gaierror as e:
|
| 27 |
+
if HAS_DNSPYTHON and host and not host.startswith('127.'):
|
| 28 |
+
# logger isn't initialized yet, but we'll try to log if possible later
|
| 29 |
+
# For now, just try to resolve
|
| 30 |
+
try:
|
| 31 |
+
resolver = dns.resolver.Resolver()
|
| 32 |
+
resolver.nameservers = ['8.8.8.8', '8.8.4.4']
|
| 33 |
+
answer = resolver.resolve(host, 'A')
|
| 34 |
+
if answer:
|
| 35 |
+
ip = str(answer[0])
|
| 36 |
+
# Re-run original with the IP
|
| 37 |
+
return _orig_getaddrinfo(ip, port, family, type, proto, flags)
|
| 38 |
+
except Exception:
|
| 39 |
+
pass
|
| 40 |
+
raise e
|
| 41 |
+
|
| 42 |
+
socket.getaddrinfo = _custom_getaddrinfo
|
| 43 |
+
# ----------------------------
|
| 44 |
+
|
| 45 |
from src.utils.logger import setup_logger
|
| 46 |
from src.utils.config import settings
|
| 47 |
|
|
|
|
| 60 |
"""
|
| 61 |
self.output_dir = output_dir or settings.temp_dir
|
| 62 |
self.output_dir.mkdir(parents=True, exist_ok=True)
|
| 63 |
+
|
| 64 |
+
def _resolve_hostname(self, hostname: str) -> Optional[str]:
|
| 65 |
+
"""
|
| 66 |
+
Attempt to resolve hostname using system DNS, then fallback to manual DNS (v6).
|
| 67 |
+
"""
|
| 68 |
+
# 1. Try system DNS
|
| 69 |
+
try:
|
| 70 |
+
return socket.gethostbyname(hostname)
|
| 71 |
+
except Exception as e:
|
| 72 |
+
logger.warning(f"[v6] System DNS failed for {hostname}: {e}")
|
| 73 |
+
|
| 74 |
+
# 2. Fallback to manual DNS query to 8.8.8.8
|
| 75 |
+
if HAS_DNSPYTHON:
|
| 76 |
+
try:
|
| 77 |
+
resolver = dns.resolver.Resolver()
|
| 78 |
+
resolver.nameservers = ['8.8.8.8', '8.8.4.4']
|
| 79 |
+
resolver.timeout = 5
|
| 80 |
+
resolver.lifetime = 10
|
| 81 |
+
answer = resolver.resolve(hostname, 'A')
|
| 82 |
+
if answer:
|
| 83 |
+
ip = str(answer[0])
|
| 84 |
+
logger.info(f"[v6] Manual DNS resolved {hostname} to {ip}")
|
| 85 |
+
return ip
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.error(f"[v6] Manual DNS also failed for {hostname}: {e}")
|
| 88 |
+
|
| 89 |
+
return None
|
| 90 |
|
| 91 |
@staticmethod
|
| 92 |
def is_valid_youtube_url(url: str) -> bool:
|