bivav commited on
Commit
e5f492e
·
1 Parent(s): 326c1b1

Update Dockerfile, fix downloading issue

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -5
  2. app/services/youtube_service.py +57 -102
Dockerfile CHANGED
@@ -25,11 +25,7 @@ COPY . .
25
  # Create directory for static files and set proper permissions
26
  RUN mkdir -p /data/audio && \
27
  chown -R nobody:nogroup /data/audio && \
28
- chmod -R 777 /data/audio && \
29
- # Create cookie file with proper permissions
30
- touch /tmp/youtube.com_cookies.txt && \
31
- chown nobody:nogroup /tmp/youtube.com_cookies.txt && \
32
- chmod 666 /tmp/youtube.com_cookies.txt
33
 
34
  # Switch to non-root user
35
  USER nobody
 
25
  # Create directory for static files and set proper permissions
26
  RUN mkdir -p /data/audio && \
27
  chown -R nobody:nogroup /data/audio && \
28
+ chmod -R 777 /data/audio
 
 
 
 
29
 
30
  # Switch to non-root user
31
  USER nobody
app/services/youtube_service.py CHANGED
@@ -8,9 +8,9 @@ from ..core.config import settings
8
  class YouTubeService:
9
  def __init__(self):
10
  self.output_dir = settings.AUDIO_DIR
11
- # Configure yt-dlp with Docker-compatible defaults
12
  self.ydl_opts = {
13
- "format": "bestaudio/best",
14
  "postprocessors": [
15
  {
16
  "key": "FFmpegExtractAudio",
@@ -18,125 +18,80 @@ class YouTubeService:
18
  "preferredquality": "192",
19
  }
20
  ],
21
- "quiet": False, # Enable output for debugging
22
- # Add user agent and other headers to avoid restrictions
23
- "http_headers": {
24
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
25
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
26
- "Accept-Language": "en-US,en;q=0.9",
27
- "Accept-Encoding": "gzip, deflate, br",
28
- "Referer": "https://www.youtube.com/",
29
- },
30
- # Add retries and throttling with better Docker-optimized settings
31
  "retries": 3,
32
  "fragment_retries": 3,
33
- "skip_unavailable_fragments": True,
34
- "ignoreerrors": False,
35
- "no_warnings": False,
36
- "sleep_interval": 1,
37
- "max_sleep_interval": 5,
38
- # Add network settings
39
  "socket_timeout": 30,
40
- "extractor_retries": 3,
41
- # Add progress hooks for better logging
42
- "progress_hooks": [self._progress_hook],
43
- # Use cookies file if available
44
- "cookiefile": "/tmp/youtube.com_cookies.txt",
 
 
 
 
 
 
45
  }
46
 
47
- def _progress_hook(self, d):
48
- if d["status"] == "downloading":
49
- try:
50
- progress = (
51
- float(d.get("downloaded_bytes", 0))
52
- / float(d.get("total_bytes", 1))
53
- * 100
54
- )
55
- print(f"Downloading: {progress:.1f}%")
56
- except:
57
- pass
58
- elif d["status"] == "error":
59
- print(f"Error during download: {d.get('error')}")
60
- elif d["status"] == "finished":
61
- print(f"Download finished, converting to MP3...")
62
-
63
  def _extract_video_id(self, url: str) -> str:
64
- query = urlparse(url)
65
- if query.hostname == "youtu.be":
66
- return query.path[1:]
67
- if query.hostname in {"www.youtube.com", "youtube.com", "m.youtube.com"}:
68
- if query.path == "/watch":
69
- return parse_qs(query.query)["v"][0]
70
- if query.path[:7] == "/embed/":
71
- return query.path.split("/")[2]
72
- if query.path[:3] == "/v/":
73
- return query.path.split("/")[2]
74
- raise ValueError("Invalid YouTube URL")
 
 
 
 
75
 
76
  async def convert_to_mp3(self, url: str) -> str:
77
- video_id = self._extract_video_id(url)
78
- filename = f"{video_id}.mp3"
79
- output_path = self.output_dir / filename
80
-
81
- # Skip if file already exists
82
- if output_path.exists():
83
- return filename
84
 
85
- # Update output template for this specific download
86
- opts = {**self.ydl_opts, "outtmpl": str(output_path.with_suffix(""))}
 
87
 
88
- try:
89
- # Create a new event loop for the executor
90
- loop = asyncio.new_event_loop()
91
- asyncio.set_event_loop(loop)
92
 
93
- # Run the download in the executor
94
- await loop.run_in_executor(None, self._download_and_convert, url, opts)
 
 
 
 
 
 
 
95
 
 
96
  if not output_path.exists():
97
  raise Exception("Failed to download and convert the video")
98
 
99
  return filename
 
 
 
 
 
100
  except Exception as e:
 
101
  print(f"Error in convert_to_mp3: {str(e)}")
102
  raise Exception(f"Error converting video: {str(e)}")
103
- finally:
104
- try:
105
- loop.close()
106
- except:
107
- pass
108
-
109
- def _download_and_convert(self, url: str, opts: dict):
110
- attempts = [
111
- {
112
- "format": "bestaudio[ext=m4a]/bestaudio/best",
113
- "msg": "Trying with best audio quality...",
114
- },
115
- {
116
- "format": "worstaudio[ext=m4a]/worstaudio/worst",
117
- "msg": "Trying with lowest quality as fallback...",
118
- },
119
- ]
120
-
121
- last_error = None
122
- for attempt in attempts:
123
- try:
124
- print(attempt["msg"])
125
- current_opts = {**opts, "format": attempt["format"]}
126
-
127
- with yt_dlp.YoutubeDL(current_opts) as ydl:
128
- error = ydl.download([url])
129
- if not error: # yt-dlp returns 0 on success
130
- return # Success!
131
- raise Exception(f"yt-dlp returned error code: {error}")
132
- except Exception as e:
133
- print(f"Download attempt failed: {str(e)}")
134
- last_error = e
135
- # Wait before next attempt
136
- asyncio.get_event_loop().run_in_executor(None, asyncio.sleep, 1)
137
-
138
- # If we get here, all attempts failed
139
- raise last_error or Exception("All download attempts failed")
140
 
141
 
142
  youtube_service = YouTubeService()
 
8
  class YouTubeService:
9
  def __init__(self):
10
  self.output_dir = settings.AUDIO_DIR
11
+ # Configure yt-dlp with robust but simple defaults
12
  self.ydl_opts = {
13
+ "format": "bestaudio[ext=m4a]/bestaudio/best", # Prefer m4a format
14
  "postprocessors": [
15
  {
16
  "key": "FFmpegExtractAudio",
 
18
  "preferredquality": "192",
19
  }
20
  ],
21
+ # Basic retry and timeout settings
 
 
 
 
 
 
 
 
 
22
  "retries": 3,
23
  "fragment_retries": 3,
 
 
 
 
 
 
24
  "socket_timeout": 30,
25
+ # Output settings
26
+ "outtmpl": "%(id)s.%(ext)s",
27
+ "quiet": True,
28
+ "no_warnings": True,
29
+ # Error handling
30
+ "ignoreerrors": False,
31
+ "no_color": True,
32
+ # Basic request headers
33
+ "http_headers": {
34
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
35
+ },
36
  }
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def _extract_video_id(self, url: str) -> str:
39
+ """Extract video ID from YouTube URL"""
40
+ try:
41
+ query = urlparse(url)
42
+ if query.hostname == "youtu.be":
43
+ return query.path[1:]
44
+ if query.hostname in {"www.youtube.com", "youtube.com", "m.youtube.com"}:
45
+ if query.path == "/watch":
46
+ return parse_qs(query.query)["v"][0]
47
+ if query.path[:7] == "/embed/":
48
+ return query.path.split("/")[2]
49
+ if query.path[:3] == "/v/":
50
+ return query.path.split("/")[2]
51
+ raise ValueError()
52
+ except:
53
+ raise ValueError("Invalid or unsupported YouTube URL format")
54
 
55
  async def convert_to_mp3(self, url: str) -> str:
56
+ """Convert YouTube video to MP3 and return the filename"""
57
+ try:
58
+ # Extract video ID and validate URL
59
+ video_id = self._extract_video_id(url)
60
+ filename = f"{video_id}.mp3"
61
+ output_path = self.output_dir / filename
 
62
 
63
+ # Skip if file already exists
64
+ if output_path.exists():
65
+ return filename
66
 
67
+ # Set output path for this specific download
68
+ opts = dict(self.ydl_opts)
69
+ opts["outtmpl"] = str(output_path.with_suffix(""))
 
70
 
71
+ # Try download with fallback
72
+ try:
73
+ with yt_dlp.YoutubeDL(opts) as ydl:
74
+ ydl.download([url])
75
+ except Exception as e:
76
+ # If first attempt fails, try with lower quality
77
+ opts["format"] = "worstaudio/worst"
78
+ with yt_dlp.YoutubeDL(opts) as ydl:
79
+ ydl.download([url])
80
 
81
+ # Verify the file was created
82
  if not output_path.exists():
83
  raise Exception("Failed to download and convert the video")
84
 
85
  return filename
86
+
87
+ except ValueError as e:
88
+ # URL validation errors
89
+ print(f"URL validation error: {str(e)}")
90
+ raise
91
  except Exception as e:
92
+ # All other errors
93
  print(f"Error in convert_to_mp3: {str(e)}")
94
  raise Exception(f"Error converting video: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
  youtube_service = YouTubeService()