Update: fix bot detection
Browse files- .env.example +3 -0
- app/core/config.py +3 -0
- app/services/youtube_service.py +60 -37
.env.example
CHANGED
|
@@ -5,6 +5,9 @@ SERVER_PORT=8000
|
|
| 5 |
# API Configuration
|
| 6 |
API_V1_STR=/api
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
# Project Configuration
|
| 9 |
PROJECT_NAME="YouTube to MP3 Converter"
|
| 10 |
|
|
|
|
| 5 |
# API Configuration
|
| 6 |
API_V1_STR=/api
|
| 7 |
|
| 8 |
+
# YouTube Configuration
|
| 9 |
+
YOUTUBE_API_KEY=your_youtube_api_key_here
|
| 10 |
+
|
| 11 |
# Project Configuration
|
| 12 |
PROJECT_NAME="YouTube to MP3 Converter"
|
| 13 |
|
app/core/config.py
CHANGED
|
@@ -11,6 +11,9 @@ class Settings(BaseSettings):
|
|
| 11 |
SERVER_HOST: str
|
| 12 |
SERVER_PORT: int
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
# Hugging Face configuration
|
| 15 |
HF_SPACE_NAME: str = "bivav/video-mp3"
|
| 16 |
HF_STATIC_DIR: str = "/data/audio"
|
|
|
|
| 11 |
SERVER_HOST: str
|
| 12 |
SERVER_PORT: int
|
| 13 |
|
| 14 |
+
# YouTube API configuration
|
| 15 |
+
YOUTUBE_API_KEY: str
|
| 16 |
+
|
| 17 |
# Hugging Face configuration
|
| 18 |
HF_SPACE_NAME: str = "bivav/video-mp3"
|
| 19 |
HF_STATIC_DIR: str = "/data/audio"
|
app/services/youtube_service.py
CHANGED
|
@@ -22,24 +22,39 @@ class YouTubeService:
|
|
| 22 |
# Add user agent and other headers to avoid restrictions
|
| 23 |
"http_headers": {
|
| 24 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
| 25 |
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 26 |
-
"Accept-Language": "en-
|
|
|
|
| 27 |
"Referer": "https://www.youtube.com/",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
},
|
| 29 |
# Add retries and throttling with better Docker-optimized settings
|
| 30 |
-
"retries":
|
| 31 |
-
"fragment_retries":
|
| 32 |
"skip_unavailable_fragments": True,
|
| 33 |
-
"ignoreerrors": False,
|
| 34 |
-
"no_warnings": False,
|
| 35 |
-
"sleep_interval":
|
| 36 |
-
"max_sleep_interval":
|
| 37 |
# Add geo bypass
|
| 38 |
"geo_bypass": True,
|
| 39 |
"geo_bypass_country": "US",
|
| 40 |
# Add network settings
|
| 41 |
-
"socket_timeout": 30,
|
| 42 |
-
"extractor_retries":
|
| 43 |
# Add progress hooks for better logging
|
| 44 |
"progress_hooks": [self._progress_hook],
|
| 45 |
# Force IPv4 to avoid IPv6 issues in some Docker networks
|
|
@@ -97,7 +112,7 @@ class YouTubeService:
|
|
| 97 |
opts = {**self.ydl_opts, "outtmpl": str(output_path.with_suffix(""))}
|
| 98 |
|
| 99 |
try:
|
| 100 |
-
#
|
| 101 |
await asyncio.get_event_loop().run_in_executor(
|
| 102 |
None, self._download_and_convert, url, opts
|
| 103 |
)
|
|
@@ -107,36 +122,44 @@ class YouTubeService:
|
|
| 107 |
|
| 108 |
return filename
|
| 109 |
except Exception as e:
|
| 110 |
-
print(
|
| 111 |
-
f"Error in convert_to_mp3: {str(e)}"
|
| 112 |
-
) # Better error logging for Docker
|
| 113 |
raise Exception(f"Error converting video: {str(e)}")
|
| 114 |
|
| 115 |
def _download_and_convert(self, url: str, opts: dict):
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
try:
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
youtube_service = YouTubeService()
|
|
|
|
| 22 |
# Add user agent and other headers to avoid restrictions
|
| 23 |
"http_headers": {
|
| 24 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
| 25 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
| 26 |
+
"Accept-Language": "en-US,en;q=0.9",
|
| 27 |
+
"Accept-Encoding": "gzip, deflate, br",
|
| 28 |
"Referer": "https://www.youtube.com/",
|
| 29 |
+
"Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120"',
|
| 30 |
+
"Sec-Ch-Ua-Mobile": "?0",
|
| 31 |
+
"Sec-Ch-Ua-Platform": '"Windows"',
|
| 32 |
+
"Sec-Fetch-Dest": "document",
|
| 33 |
+
"Sec-Fetch-Mode": "navigate",
|
| 34 |
+
"Sec-Fetch-Site": "none",
|
| 35 |
+
"Sec-Fetch-User": "?1",
|
| 36 |
+
"X-YouTube-API-Key": settings.YOUTUBE_API_KEY, # Add API key to headers
|
| 37 |
+
},
|
| 38 |
+
# Add API key to extractor args
|
| 39 |
+
"extractor_args": {
|
| 40 |
+
"youtube": {
|
| 41 |
+
"api_key": [settings.YOUTUBE_API_KEY],
|
| 42 |
+
}
|
| 43 |
},
|
| 44 |
# Add retries and throttling with better Docker-optimized settings
|
| 45 |
+
"retries": 5,
|
| 46 |
+
"fragment_retries": 5,
|
| 47 |
"skip_unavailable_fragments": True,
|
| 48 |
+
"ignoreerrors": False,
|
| 49 |
+
"no_warnings": False,
|
| 50 |
+
"sleep_interval": 5,
|
| 51 |
+
"max_sleep_interval": 10,
|
| 52 |
# Add geo bypass
|
| 53 |
"geo_bypass": True,
|
| 54 |
"geo_bypass_country": "US",
|
| 55 |
# Add network settings
|
| 56 |
+
"socket_timeout": 30,
|
| 57 |
+
"extractor_retries": 5,
|
| 58 |
# Add progress hooks for better logging
|
| 59 |
"progress_hooks": [self._progress_hook],
|
| 60 |
# Force IPv4 to avoid IPv6 issues in some Docker networks
|
|
|
|
| 112 |
opts = {**self.ydl_opts, "outtmpl": str(output_path.with_suffix(""))}
|
| 113 |
|
| 114 |
try:
|
| 115 |
+
# First try with default options
|
| 116 |
await asyncio.get_event_loop().run_in_executor(
|
| 117 |
None, self._download_and_convert, url, opts
|
| 118 |
)
|
|
|
|
| 122 |
|
| 123 |
return filename
|
| 124 |
except Exception as e:
|
| 125 |
+
print(f"Error in convert_to_mp3: {str(e)}")
|
|
|
|
|
|
|
| 126 |
raise Exception(f"Error converting video: {str(e)}")
|
| 127 |
|
| 128 |
def _download_and_convert(self, url: str, opts: dict):
|
| 129 |
+
attempts = [
|
| 130 |
+
{"format": "bestaudio/best", "msg": "Trying with best audio quality..."},
|
| 131 |
+
{
|
| 132 |
+
"format": "worstaudio/worst",
|
| 133 |
+
"msg": "Trying with lowest quality as fallback...",
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"format": "bestaudio/best",
|
| 137 |
+
"extract_flat": True,
|
| 138 |
+
"msg": "Trying with flat extraction...",
|
| 139 |
+
},
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
last_error = None
|
| 143 |
+
for attempt in attempts:
|
| 144 |
try:
|
| 145 |
+
print(attempt["msg"])
|
| 146 |
+
current_opts = {**opts, "format": attempt["format"]}
|
| 147 |
+
if "extract_flat" in attempt:
|
| 148 |
+
current_opts["extract_flat"] = attempt["extract_flat"]
|
| 149 |
+
|
| 150 |
+
with yt_dlp.YoutubeDL(current_opts) as ydl:
|
| 151 |
+
error = ydl.download([url])
|
| 152 |
+
if not error: # yt-dlp returns 0 on success
|
| 153 |
+
return # Success!
|
| 154 |
+
raise Exception(f"yt-dlp returned error code: {error}")
|
| 155 |
+
except Exception as e:
|
| 156 |
+
print(f"Download attempt failed: {str(e)}")
|
| 157 |
+
last_error = e
|
| 158 |
+
# Wait before next attempt
|
| 159 |
+
asyncio.get_event_loop().run_in_executor(None, asyncio.sleep, 2)
|
| 160 |
+
|
| 161 |
+
# If we get here, all attempts failed
|
| 162 |
+
raise last_error or Exception("All download attempts failed")
|
| 163 |
|
| 164 |
|
| 165 |
youtube_service = YouTubeService()
|