Spaces:

Khelendramee
/

stocker

Sleeping

App Files Files Community

Khelendramee commited on Apr 29, 2025

Commit

ee67785

verified ·

1 Parent(s): dd4501c

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -25

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
-from fastapi import FastAPI, HTTPException, BackgroundTasks
-from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import subprocess
 import os
@@ -8,12 +9,15 @@ import tempfile
 import uuid
 import time
 import asyncio
-from typing import Optional
 import whisper
 from googletrans import Translator
 from gtts import gTTS
 import yt_dlp
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -30,7 +34,17 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Load whisper model (small version for speed)
 try:
     model = whisper.load_model("tiny")
     logger.info("Whisper model loaded successfully")
@@ -41,15 +55,77 @@ except Exception as e:
 # Initialize translator
 translator = Translator()
-# Temporary directory for storing audio chunks
-TEMP_DIR = tempfile.gettempdir()
-os.makedirs(os.path.join(TEMP_DIR, "youtube_translator"), exist_ok=True)
 class VideoRequest(BaseModel):
     url: str
     timestamp: Optional[int] = 0  # Start time in seconds
     chunk_size: Optional[int] = 15  # Size of each chunk in seconds
     target_language: str = "en"  # Default target language
 @app.post("/process-chunk/")
 async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks):
@@ -57,7 +133,7 @@ async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks
     try:
         # Generate a unique ID for this request
         request_id = str(uuid.uuid4())
-        chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
         # Extract audio chunk from YouTube video
         start_time = request.timestamp
@@ -85,9 +161,54 @@ async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks
             'no_warnings': True
         }
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([request.url])
         # Process the audio chunk in background
         background_tasks.add_task(
             process_audio_chunk,
@@ -103,6 +224,26 @@ async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks
         logger.error(f"Error processing chunk: {e}")
         raise HTTPException(status_code=500, detail=f"Error processing chunk: {str(e)}")
 async def process_audio_chunk(chunk_path, target_language, request_id):
     """Process an audio chunk: transcribe, translate, and convert to speech"""
     try:
@@ -117,22 +258,28 @@ async def process_audio_chunk(chunk_path, target_language, request_id):
         # Step 3: Convert translation to speech
         logger.info(f"Converting translation to speech: {translation[:50]}...")
-        tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
         tts = gTTS(text=translation, lang=target_language)
         tts.save(tts_output_path)
         logger.info(f"Audio processing completed for request {request_id}")
     except Exception as e:
         logger.error(f"Error processing audio chunk: {e}")
-        # Cleanup
         if os.path.exists(chunk_path):
             os.remove(chunk_path)
 @app.get("/get-audio/{request_id}")
 async def get_audio(request_id: str):
     """Get the processed audio for a specific request"""
-    tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
     # Check if the file exists
     if not os.path.exists(tts_output_path):
@@ -143,14 +290,11 @@ async def get_audio(request_id: str):
         with open(tts_output_path, "rb") as f:
             yield from f
-        # Clean up the files after streaming
-        try:
-            os.remove(tts_output_path)
-            chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
-            if os.path.exists(chunk_path):
-                os.remove(chunk_path)
-        except Exception as e:
-            logger.error(f"Error cleaning up files: {e}")
     return StreamingResponse(
         iterfile(),
@@ -158,16 +302,31 @@ async def get_audio(request_id: str):
         headers={"Content-Disposition": f"attachment; filename={request_id}.mp3"}
     )
 @app.get("/status/{request_id}")
 async def check_status(request_id: str):
     """Check the status of a processing request"""
-    tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
     if os.path.exists(tts_output_path):
         return {"status": "completed", "request_id": request_id}
     else:
         # Check if the original chunk exists (meaning processing is in progress)
-        chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
         if os.path.exists(chunk_path):
             return {"status": "processing", "request_id": request_id}
         else:
@@ -180,7 +339,7 @@ async def root():
 # Simple health check endpoint
 @app.get("/health")
 async def health_check():
-    return {"status": "healthy"}
 if __name__ == "__main__":
     import uvicorn

+from fastapi import FastAPI, HTTPException, BackgroundTasks, File, UploadFile
+from fastapi.responses import StreamingResponse, FileResponse
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 import subprocess
 import os
 import uuid
 import time
 import asyncio
+from typing import Optional, List
 import whisper
 from googletrans import Translator
 from gtts import gTTS
 import yt_dlp
 import logging
+import json
+import platform
+import pathlib
 # Set up logging
 logging.basicConfig(level=logging.INFO)
     allow_headers=["*"],
 )
+# Create directories
+TEMP_DIR = tempfile.gettempdir()
+YOUTUBE_DIR = os.path.join(TEMP_DIR, "youtube_translator")
+COOKIE_DIR = os.path.join(TEMP_DIR, "youtube_cookies")
+os.makedirs(YOUTUBE_DIR, exist_ok=True)
+os.makedirs(COOKIE_DIR, exist_ok=True)
+# Default cookie file path
+DEFAULT_COOKIE_FILE = os.path.join(COOKIE_DIR, "youtube_cookies.txt")
+# Load whisper model (tiny version for speed)
 try:
     model = whisper.load_model("tiny")
     logger.info("Whisper model loaded successfully")
 # Initialize translator
 translator = Translator()
 class VideoRequest(BaseModel):
     url: str
     timestamp: Optional[int] = 0  # Start time in seconds
     chunk_size: Optional[int] = 15  # Size of each chunk in seconds
     target_language: str = "en"  # Default target language
+    use_cookies: Optional[bool] = True  # Whether to use cookies
+class CookieUploadResponse(BaseModel):
+    status: str
+    message: str
+def get_browser_name():
+    """Get the default browser name based on the OS"""
+    system = platform.system().lower()
+    if system == "windows":
+        return "chrome"
+    elif system == "darwin":  # macOS
+        return "safari"
+    else:  # Linux and others
+        return "firefox"
+def get_cookies_from_browser():
+    """Try to extract cookies from the default browser"""
+    try:
+        browser = get_browser_name()
+        cookie_file = os.path.join(COOKIE_DIR, f"{browser}_cookies.txt")
+        # Use yt-dlp's cookie extraction feature
+        cmd = ["yt-dlp", "--cookies-from-browser", browser, "--cookies", cookie_file, "-j", "dQw4w9WgXcQ"]
+        subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+        if os.path.exists(cookie_file) and os.path.getsize(cookie_file) > 0:
+            logger.info(f"Successfully extracted cookies from {browser}")
+            return cookie_file
+        logger.warning(f"Failed to extract cookies from {browser}")
+        return None
+    except Exception as e:
+        logger.error(f"Error extracting cookies from browser: {e}")
+        return None
+@app.post("/upload-cookies/")
+async def upload_cookies(file: UploadFile = File(...)):
+    """Upload cookies file for YouTube authentication"""
+    try:
+        # Save the uploaded file
+        cookie_path = os.path.join(COOKIE_DIR, "youtube_cookies.txt")
+        with open(cookie_path, "wb") as buffer:
+            content = await file.read()
+            buffer.write(content)
+        return CookieUploadResponse(
+            status="success",
+            message=f"Cookie file uploaded successfully"
+        )
+    except Exception as e:
+        logger.error(f"Error uploading cookies: {e}")
+        raise HTTPException(status_code=500, detail=f"Error uploading cookies: {str(e)}")
+def get_cookie_file():
+    """Get the cookie file path to use with yt-dlp"""
+    # First check if user has uploaded cookies
+    if os.path.exists(DEFAULT_COOKIE_FILE) and os.path.getsize(DEFAULT_COOKIE_FILE) > 0:
+        return DEFAULT_COOKIE_FILE
+    # Try to extract from browser if no uploaded cookies
+    browser_cookies = get_cookies_from_browser()
+    if browser_cookies:
+        return browser_cookies
+    return None
 @app.post("/process-chunk/")
 async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks):
     try:
         # Generate a unique ID for this request
         request_id = str(uuid.uuid4())
+        chunk_path = os.path.join(YOUTUBE_DIR, f"{request_id}.mp3")
         # Extract audio chunk from YouTube video
         start_time = request.timestamp
             'no_warnings': True
         }
+        # Add cookies if available and requested
+        if request.use_cookies:
+            cookie_file = get_cookie_file()
+            if cookie_file:
+                logger.info(f"Using cookie file: {cookie_file}")
+                ydl_opts['cookiefile'] = cookie_file
+        # Try alternative download method if needed
+        try:
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                ydl.download([request.url])
+        except Exception as e:
+            logger.warning(f"Initial download failed: {e}")
+            # Try an alternative approach - download directly with ffmpeg
+            video_id = extract_video_id(request.url)
+            if video_id:
+                try:
+                    # Use ffmpeg directly
+                    audio_url = f"https://www.youtube.com/watch?v={video_id}"
+                    cmd = [
+                        "ffmpeg", "-y",
+                        "-ss", str(start_time),
+                        "-t", str(request.chunk_size),
+                        "-i", audio_url,
+                        "-q:a", "0",
+                        "-map", "a",
+                        chunk_path
+                    ]
+                    # Execute ffmpeg command
+                    subprocess.run(cmd, check=True, capture_output=True)
+                    if os.path.exists(chunk_path) and os.path.getsize(chunk_path) > 0:
+                        logger.info("Successfully downloaded using ffmpeg")
+                    else:
+                        raise Exception("ffmpeg download resulted in empty file")
+                except Exception as ffmpeg_error:
+                    logger.error(f"ffmpeg download failed: {ffmpeg_error}")
+                    raise e  # Re-raise the original error
+            else:
+                raise e
+        # Check if file was downloaded successfully
+        if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
+            raise Exception("Failed to download audio chunk - file is empty or missing")
         # Process the audio chunk in background
         background_tasks.add_task(
             process_audio_chunk,
         logger.error(f"Error processing chunk: {e}")
         raise HTTPException(status_code=500, detail=f"Error processing chunk: {str(e)}")
+def extract_video_id(url):
+    """Extract YouTube video ID from URL"""
+    try:
+        from urllib.parse import urlparse, parse_qs
+        if 'youtu.be' in url:
+            return url.split('/')[-1].split('?')[0]
+        parsed_url = urlparse(url)
+        if 'youtube.com' in parsed_url.netloc:
+            if 'v' in parse_qs(parsed_url.query):
+                return parse_qs(parsed_url.query)['v'][0]
+            elif 'embed' in parsed_url.path:
+                return parsed_url.path.split('/')[-1]
+        return None
+    except Exception as e:
+        logger.error(f"Error extracting video ID: {e}")
+        return None
 async def process_audio_chunk(chunk_path, target_language, request_id):
     """Process an audio chunk: transcribe, translate, and convert to speech"""
     try:
         # Step 3: Convert translation to speech
         logger.info(f"Converting translation to speech: {translation[:50]}...")
+        tts_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_tts.mp3")
         tts = gTTS(text=translation, lang=target_language)
         tts.save(tts_output_path)
+        # Save translation text for retrieval
+        text_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_text.txt")
+        with open(text_output_path, "w", encoding="utf-8") as f:
+            f.write(translation)
         logger.info(f"Audio processing completed for request {request_id}")
     except Exception as e:
         logger.error(f"Error processing audio chunk: {e}")
+    finally:
+        # Cleanup original audio file
         if os.path.exists(chunk_path):
             os.remove(chunk_path)
 @app.get("/get-audio/{request_id}")
 async def get_audio(request_id: str):
     """Get the processed audio for a specific request"""
+    tts_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_tts.mp3")
     # Check if the file exists
     if not os.path.exists(tts_output_path):
         with open(tts_output_path, "rb") as f:
             yield from f
+        # Clean up the files after streaming (optional)
+        # try:
+        #     os.remove(tts_output_path)
+        # except Exception as e:
+        #     logger.error(f"Error cleaning up files: {e}")
     return StreamingResponse(
         iterfile(),
         headers={"Content-Disposition": f"attachment; filename={request_id}.mp3"}
     )
+@app.get("/get-translation/{request_id}")
+async def get_translation(request_id: str):
+    """Get the translated text for a specific request"""
+    text_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_text.txt")
+    # Check if the file exists
+    if not os.path.exists(text_output_path):
+        raise HTTPException(status_code=404, detail="Translation text not found or processing not completed")
+    # Return the translated text
+    with open(text_output_path, "r", encoding="utf-8") as f:
+        translation = f.read()
+    return {"request_id": request_id, "translation": translation}
 @app.get("/status/{request_id}")
 async def check_status(request_id: str):
     """Check the status of a processing request"""
+    tts_output_path = os.path.join(YOUTUBE_DIR, f"{request_id}_tts.mp3")
     if os.path.exists(tts_output_path):
         return {"status": "completed", "request_id": request_id}
     else:
         # Check if the original chunk exists (meaning processing is in progress)
+        chunk_path = os.path.join(YOUTUBE_DIR, f"{request_id}.mp3")
         if os.path.exists(chunk_path):
             return {"status": "processing", "request_id": request_id}
         else:
 # Simple health check endpoint
 @app.get("/health")
 async def health_check():
+    return {"status": "healthy", "youtube_cookies": os.path.exists(DEFAULT_COOKIE_FILE)}
 if __name__ == "__main__":
     import uvicorn