Khelendramee commited on
Commit
ee67785
·
verified ·
1 Parent(s): dd4501c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -25
app.py CHANGED
@@ -1,6 +1,7 @@
1
- from fastapi import FastAPI, HTTPException, BackgroundTasks
2
- from fastapi.responses import StreamingResponse
3
  from fastapi.middleware.cors import CORSMiddleware
 
4
  from pydantic import BaseModel
5
  import subprocess
6
  import os
@@ -8,12 +9,15 @@ import tempfile
8
  import uuid
9
  import time
10
  import asyncio
11
- from typing import Optional
12
  import whisper
13
  from googletrans import Translator
14
  from gtts import gTTS
15
  import yt_dlp
16
  import logging
 
 
 
17
 
18
  # Set up logging
19
  logging.basicConfig(level=logging.INFO)
@@ -30,7 +34,17 @@ app.add_middleware(
30
  allow_headers=["*"],
31
  )
32
 
33
- # Load whisper model (small version for speed)
 
 
 
 
 
 
 
 
 
 
34
  try:
35
  model = whisper.load_model("tiny")
36
  logger.info("Whisper model loaded successfully")
@@ -41,15 +55,77 @@ except Exception as e:
41
  # Initialize translator
42
  translator = Translator()
43
 
44
- # Temporary directory for storing audio chunks
45
- TEMP_DIR = tempfile.gettempdir()
46
- os.makedirs(os.path.join(TEMP_DIR, "youtube_translator"), exist_ok=True)
47
-
48
  class VideoRequest(BaseModel):
49
  url: str
50
  timestamp: Optional[int] = 0 # Start time in seconds
51
  chunk_size: Optional[int] = 15 # Size of each chunk in seconds
52
  target_language: str = "en" # Default target language
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  @app.post("/process-chunk/")
55
  async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks):
@@ -57,7 +133,7 @@ async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks
57
  try:
58
  # Generate a unique ID for this request
59
  request_id = str(uuid.uuid4())
60
- chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
61
 
62
  # Extract audio chunk from YouTube video
63
  start_time = request.timestamp
@@ -85,9 +161,54 @@ async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks
85
  'no_warnings': True
86
  }
87
 
88
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
89
- ydl.download([request.url])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
 
 
 
 
91
  # Process the audio chunk in background
92
  background_tasks.add_task(
93
  process_audio_chunk,
@@ -103,6 +224,26 @@ async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks
103
  logger.error(f"Error processing chunk: {e}")
104
  raise HTTPException(status_code=500, detail=f"Error processing chunk: {str(e)}")
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  async def process_audio_chunk(chunk_path, target_language, request_id):
107
  """Process an audio chunk: transcribe, translate, and convert to speech"""
108
  try:
@@ -117,22 +258,28 @@ async def process_audio_chunk(chunk_path, target_language, request_id):
117
 
118
  # Step 3: Convert translation to speech
119
  logger.info(f"Converting translation to speech: {translation[:50]}...")
120
- tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
121
  tts = gTTS(text=translation, lang=target_language)
122
  tts.save(tts_output_path)
123
 
 
 
 
 
 
124
  logger.info(f"Audio processing completed for request {request_id}")
125
 
126
  except Exception as e:
127
  logger.error(f"Error processing audio chunk: {e}")
128
- # Cleanup
 
129
  if os.path.exists(chunk_path):
130
  os.remove(chunk_path)
131
 
132
  @app.get("/get-audio/{request_id}")
133
  async def get_audio(request_id: str):
134
  """Get the processed audio for a specific request"""
135
- tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
136
 
137
  # Check if the file exists
138
  if not os.path.exists(tts_output_path):
@@ -143,14 +290,11 @@ async def get_audio(request_id: str):
143
  with open(tts_output_path, "rb") as f:
144
  yield from f
145
 
146
- # Clean up the files after streaming
147
- try:
148
- os.remove(tts_output_path)
149
- chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
150
- if os.path.exists(chunk_path):
151
- os.remove(chunk_path)
152
- except Exception as e:
153
- logger.error(f"Error cleaning up files: {e}")
154
 
155
  return StreamingResponse(
156
  iterfile(),
@@ -158,16 +302,31 @@ async def get_audio(request_id: str):
158
  headers={"Content-Disposition": f"attachment; filename={request_id}.mp3"}
159
  )
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  @app.get("/status/{request_id}")
162
  async def check_status(request_id: str):
163
  """Check the status of a processing request"""
164
- tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
165
 
166
  if os.path.exists(tts_output_path):
167
  return {"status": "completed", "request_id": request_id}
168
  else:
169
  # Check if the original chunk exists (meaning processing is in progress)
170
- chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
171
  if os.path.exists(chunk_path):
172
  return {"status": "processing", "request_id": request_id}
173
  else:
@@ -180,7 +339,7 @@ async def root():
180
  # Simple health check endpoint
181
  @app.get("/health")
182
  async def health_check():
183
- return {"status": "healthy"}
184
 
185
  if __name__ == "__main__":
186
  import uvicorn
 
1
+ from fastapi import FastAPI, HTTPException, BackgroundTasks, File, UploadFile
2
+ from fastapi.responses import StreamingResponse, FileResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.staticfiles import StaticFiles
5
  from pydantic import BaseModel
6
  import subprocess
7
  import os
 
9
  import uuid
10
  import time
11
  import asyncio
12
+ from typing import Optional, List
13
  import whisper
14
  from googletrans import Translator
15
  from gtts import gTTS
16
  import yt_dlp
17
  import logging
18
+ import json
19
+ import platform
20
+ import pathlib
21
 
22
  # Set up logging
23
  logging.basicConfig(level=logging.INFO)
 
34
  allow_headers=["*"],
35
  )
36
 
37
+ # Create directories
38
+ TEMP_DIR = tempfile.gettempdir()
39
+ YOUTUBE_DIR = os.path.join(TEMP_DIR, "youtube_translator")
40
+ COOKIE_DIR = os.path.join(TEMP_DIR, "youtube_cookies")
41
+ os.makedirs(YOUTUBE_DIR, exist_ok=True)
42
+ os.makedirs(COOKIE_DIR, exist_ok=True)
43
+
44
+ # Default cookie file path
45
+ DEFAULT_COOKIE_FILE = os.path.join(COOKIE_DIR, "youtube_cookies.txt")
46
+
47
+ # Load whisper model (tiny version for speed)
48
  try:
49
  model = whisper.load_model("tiny")
50
  logger.info("Whisper model loaded successfully")
 
55
  # Initialize translator
56
  translator = Translator()
57
 
 
 
 
 
58
  class VideoRequest(BaseModel):
59
  url: str
60
  timestamp: Optional[int] = 0 # Start time in seconds
61
  chunk_size: Optional[int] = 15 # Size of each chunk in seconds
62
  target_language: str = "en" # Default target language
63
+ use_cookies: Optional[bool] = True # Whether to use cookies
64
+
65
+ class CookieUploadResponse(BaseModel):
66
+ status: str
67
+ message: str
68
+
69
+ def get_browser_name():
70
+ """Get the default browser name based on the OS"""
71
+ system = platform.system().lower()
72
+ if system == "windows":
73
+ return "chrome"
74
+ elif system == "darwin": # macOS
75
+ return "safari"
76
+ else: # Linux and others
77
+ return "firefox"
78
+
79
+ def get_cookies_from_browser():
80
+ """Try to extract cookies from the default browser"""
81
+ try:
82
+ browser = get_browser_name()
83
+ cookie_file = os.path.join(COOKIE_DIR, f"{browser}_cookies.txt")
84
+
85
+ # Use yt-dlp's cookie extraction feature
86
+ cmd = ["yt-dlp", "--cookies-from-browser", browser, "--cookies", cookie_file, "-j", "dQw4w9WgXcQ"]
87
+ subprocess.run(cmd, capture_output=True, text=True, timeout=30)
88
+
89
+ if os.path.exists(cookie_file) and os.path.getsize(cookie_file) > 0:
90
+ logger.info(f"Successfully extracted cookies from {browser}")
91
+ return cookie_file
92
+
93
+ logger.warning(f"Failed to extract cookies from {browser}")
94
+ return None
95
+ except Exception as e:
96
+ logger.error(f"Error extracting cookies from browser: {e}")
97
+ return None
98
+
99
+ @app.post("/upload-cookies/")
100
+ async def upload_cookies(file: UploadFile = File(...)):
101
+ """Upload cookies file for YouTube authentication"""
102
+ try:
103
+ # Save the uploaded file
104
+ cookie_path = os.path.join(COOKIE_DIR, "youtube_cookies.txt")
105
+ with open(cookie_path, "wb") as buffer:
106
+ content = await file.read()
107
+ buffer.write(content)
108
+
109
+ return CookieUploadResponse(
110
+ status="success",
111
+ message=f"Cookie file uploaded successfully"
112
+ )
113
+ except Exception as e:
114
+ logger.error(f"Error uploading cookies: {e}")
115
+ raise HTTPException(status_code=500, detail=f"Error uploading cookies: {str(e)}")
116
+
117
+ def get_cookie_file():
118
+ """Get the cookie file path to use with yt-dlp"""
119
+ # First check if user has uploaded cookies
120
+ if os.path.exists(DEFAULT_COOKIE_FILE) and os.path.getsize(DEFAULT_COOKIE_FILE) > 0:
121
+ return DEFAULT_COOKIE_FILE
122
+
123
+ # Try to extract from browser if no uploaded cookies
124
+ browser_cookies = get_cookies_from_browser()
125
+ if browser_cookies:
126
+ return browser_cookies
127
+
128
+ return None
129
 
130
  @app.post("/process-chunk/")
131
  async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks):
 
133
  try:
134
  # Generate a unique ID for this request
135
  request_id = str(uuid.uuid4())
136
+ chunk_path = os.path.join(YOUTUBE_DIR, f"{request_id}.mp3")
137
 
138
  # Extract audio chunk from YouTube video
139
  start_time = request.timestamp
 
161
  'no_warnings': True
162
  }
163
 
164
+ # Add cookies if available and requested
165
+ if request.use_cookies:
166
+ cookie_file = get_cookie_file()
167
+ if cookie_file:
168
+ logger.info(f"Using cookie file: {cookie_file}")
169
+ ydl_opts['cookiefile'] = cookie_file
170
+
171
+ # Try alternative download method if needed
172
+ try:
173
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
174
+ ydl.download([request.url])
175
+ except Exception as e:
176
+ logger.warning(f"Initial download failed: {e}")
177
+
178
+ # Try an alternative approach - download directly with ffmpeg
179
+ video_id = extract_video_id(request.url)
180
+ if video_id:
181
+ try:
182
+ # Use ffmpeg directly
183
+ audio_url = f"https://www.youtube.com/watch?v={video_id}"
184
+ cmd = [
185
+ "ffmpeg", "-y",
186
+ "-ss", str(start_time),
187
+ "-t", str(request.chunk_size),
188
+ "-i", audio_url,
189
+ "-q:a", "0",
190
+ "-map", "a",
191
+ chunk_path
192
+ ]
193
+
194
+ # Execute ffmpeg command
195
+ subprocess.run(cmd, check=True, capture_output=True)
196
+
197
+ if os.path.exists(chunk_path) and os.path.getsize(chunk_path) > 0:
198
+ logger.info("Successfully downloaded using ffmpeg")
199
+ else:
200
+ raise Exception("ffmpeg download resulted in empty file")
201
+
202
+ except Exception as ffmpeg_error:
203
+ logger.error(f"ffmpeg download failed: {ffmpeg_error}")
204
+ raise e # Re-raise the original error
205
+ else:
206
+ raise e
207
 
208
+ # Check if file was downloaded successfully
209
+ if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
210
+ raise Exception("Failed to download audio chunk - file is empty or missing")
211
+
212
  # Process the audio chunk in background
213
  background_tasks.add_task(
214
  process_audio_chunk,
 
224
  logger.error(f"Error processing chunk: {e}")
225
  raise HTTPException(status_code=500, detail=f"Error processing chunk: {str(e)}")
226
 
227
+ def extract_video_id(url):
228
+ """Extract YouTube video ID from URL"""
229
+ try:
230
+ from urllib.parse import urlparse, parse_qs
231
+
232
+ if 'youtu.be' in url:
233
+ return url.split('/')[-1].split('?')[0]
234
+
235
+ parsed_url = urlparse(url)
236
+ if 'youtube.com' in parsed_url.netloc:
237
+ if 'v' in parse_qs(parsed_url.query):
238
+ return parse_qs(parsed_url.query)['v'][0]
239
+ elif 'embed' in parsed_url.path:
240
+ return parsed_url.path.split('/')[-1]
241
+
242
+ return None
243
+ except Exception as e:
244
+ logger.error(f"Error extracting video ID: {e}")
245
+ return None
246
+
247
  async def process_audio_chunk(chunk_path, target_language, request_id):
248
  """Process an audio chunk: transcribe, translate, and convert to speech"""
249
  try:
 
258
 
259
  # Step 3: Convert translation to speech
260
  logger.info(f"Converting translation to speech: {translation[:50]}...")
261
+ tts_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_tts.mp3")
262
  tts = gTTS(text=translation, lang=target_language)
263
  tts.save(tts_output_path)
264
 
265
+ # Save translation text for retrieval
266
+ text_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_text.txt")
267
+ with open(text_output_path, "w", encoding="utf-8") as f:
268
+ f.write(translation)
269
+
270
  logger.info(f"Audio processing completed for request {request_id}")
271
 
272
  except Exception as e:
273
  logger.error(f"Error processing audio chunk: {e}")
274
+ finally:
275
+ # Cleanup original audio file
276
  if os.path.exists(chunk_path):
277
  os.remove(chunk_path)
278
 
279
  @app.get("/get-audio/{request_id}")
280
  async def get_audio(request_id: str):
281
  """Get the processed audio for a specific request"""
282
+ tts_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_tts.mp3")
283
 
284
  # Check if the file exists
285
  if not os.path.exists(tts_output_path):
 
290
  with open(tts_output_path, "rb") as f:
291
  yield from f
292
 
293
+ # Clean up the files after streaming (optional)
294
+ # try:
295
+ # os.remove(tts_output_path)
296
+ # except Exception as e:
297
+ # logger.error(f"Error cleaning up files: {e}")
 
 
 
298
 
299
  return StreamingResponse(
300
  iterfile(),
 
302
  headers={"Content-Disposition": f"attachment; filename={request_id}.mp3"}
303
  )
304
 
305
+ @app.get("/get-translation/{request_id}")
306
+ async def get_translation(request_id: str):
307
+ """Get the translated text for a specific request"""
308
+ text_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_text.txt")
309
+
310
+ # Check if the file exists
311
+ if not os.path.exists(text_output_path):
312
+ raise HTTPException(status_code=404, detail="Translation text not found or processing not completed")
313
+
314
+ # Return the translated text
315
+ with open(text_output_path, "r", encoding="utf-8") as f:
316
+ translation = f.read()
317
+
318
+ return {"request_id": request_id, "translation": translation}
319
+
320
  @app.get("/status/{request_id}")
321
  async def check_status(request_id: str):
322
  """Check the status of a processing request"""
323
+ tts_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_tts.mp3")
324
 
325
  if os.path.exists(tts_output_path):
326
  return {"status": "completed", "request_id": request_id}
327
  else:
328
  # Check if the original chunk exists (meaning processing is in progress)
329
+ chunk_path = os.path.join(YOUTUBE_DIR, f"{request_id}.mp3")
330
  if os.path.exists(chunk_path):
331
  return {"status": "processing", "request_id": request_id}
332
  else:
 
339
  # Simple health check endpoint
340
  @app.get("/health")
341
  async def health_check():
342
+ return {"status": "healthy", "youtube_cookies": os.path.exists(DEFAULT_COOKIE_FILE)}
343
 
344
  if __name__ == "__main__":
345
  import uvicorn