Khelendramee commited on
Commit
eb6f602
·
verified ·
1 Parent(s): 4514c12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -46
app.py CHANGED
@@ -1,56 +1,187 @@
1
- from fastapi import FastAPI
2
- import os
 
 
3
  import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- app = FastAPI()
 
6
 
7
- # Head node address
8
- RAY_HEAD_ADDRESS = "ray://172.28.0.12:6379" # <-- Apna correct head node address yahan daalo
 
9
 
10
- # Worker connect status
11
- connected = False
 
 
 
12
 
13
- def get_ray_status():
14
- """Check if ray is running or stopped."""
 
15
  try:
16
- output = subprocess.check_output(["ray", "status"], stderr=subprocess.STDOUT)
17
- if b"cluster" in output or b"Connected" in output:
18
- return "running"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  else:
20
- return "unknown"
21
- except subprocess.CalledProcessError:
22
- return "stopped"
23
- except FileNotFoundError:
24
- return "ray-not-installed"
25
-
26
- @app.post("/worker")
27
- async def connect_worker():
28
- global connected
29
- if not connected:
30
- os.system(f"ray start --address='{RAY_HEAD_ADDRESS}'")
31
- connected = True
32
- return {
33
- "message": "Worker connection attempt finished",
34
- "connection": 1 if connected else 0,
35
- "ray_status": get_ray_status()
36
- }
37
-
38
- @app.post("/noworker")
39
- async def disconnect_worker():
40
- global connected
41
- if connected:
42
- os.system("ray stop")
43
- connected = False
44
- return {
45
- "message": "Worker disconnection attempt finished",
46
- "connection": 1 if connected else 0,
47
- "ray_status": get_ray_status()
48
- }
49
 
50
  @app.get("/")
51
  async def root():
52
- return {
53
- "message": "Worker Node Ready",
54
- "connection": 1 if connected else 0,
55
- "ray_status": get_ray_status()
56
- }
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
2
+ from fastapi.responses import StreamingResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
  import subprocess
6
+ import os
7
+ import tempfile
8
+ import uuid
9
+ import time
10
+ import asyncio
11
+ from typing import Optional
12
+ import whisper
13
+ from googletrans import Translator
14
+ from gtts import gTTS
15
+ import yt_dlp
16
+ import logging
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ app = FastAPI(title="YouTube Streaming Translator API")
23
+
24
+ # Enable CORS
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_credentials=True,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
+
33
+ # Load whisper model (small version for speed)
34
+ try:
35
+ model = whisper.load_model("tiny")
36
+ logger.info("Whisper model loaded successfully")
37
+ except Exception as e:
38
+ logger.error(f"Failed to load whisper model: {e}")
39
+ model = None
40
 
41
+ # Initialize translator
42
+ translator = Translator()
43
 
44
+ # Temporary directory for storing audio chunks
45
+ TEMP_DIR = tempfile.gettempdir()
46
+ os.makedirs(os.path.join(TEMP_DIR, "youtube_translator"), exist_ok=True)
47
 
48
+ class VideoRequest(BaseModel):
49
+ url: str
50
+ timestamp: Optional[int] = 0 # Start time in seconds
51
+ chunk_size: Optional[int] = 15 # Size of each chunk in seconds
52
+ target_language: str = "en" # Default target language
53
 
54
+ @app.post("/process-chunk/")
55
+ async def process_chunk(request: VideoRequest, background_tasks: BackgroundTasks):
56
+ """Process a single chunk of a YouTube video"""
57
  try:
58
+ # Generate a unique ID for this request
59
+ request_id = str(uuid.uuid4())
60
+ chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
61
+
62
+ # Extract audio chunk from YouTube video
63
+ start_time = request.timestamp
64
+ end_time = start_time + request.chunk_size
65
+
66
+ logger.info(f"Extracting audio chunk from {request.url} from {start_time}s to {end_time}s")
67
+
68
+ # Use yt-dlp to download only the specific chunk
69
+ ydl_opts = {
70
+ 'format': 'bestaudio/best',
71
+ 'outtmpl': chunk_path,
72
+ 'postprocessors': [{
73
+ 'key': 'FFmpegExtractAudio',
74
+ 'preferredcodec': 'mp3',
75
+ 'preferredquality': '192',
76
+ }],
77
+ 'download_ranges': {
78
+ 'chapters': [],
79
+ 'ranges': {
80
+ 'start_time': start_time,
81
+ 'end_time': end_time
82
+ }
83
+ },
84
+ 'quiet': True,
85
+ 'no_warnings': True
86
+ }
87
+
88
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
89
+ ydl.download([request.url])
90
+
91
+ # Process the audio chunk in background
92
+ background_tasks.add_task(
93
+ process_audio_chunk,
94
+ chunk_path,
95
+ request.target_language,
96
+ request_id
97
+ )
98
+
99
+ # Return a response with the request ID
100
+ return {"request_id": request_id, "status": "processing"}
101
+
102
+ except Exception as e:
103
+ logger.error(f"Error processing chunk: {e}")
104
+ raise HTTPException(status_code=500, detail=f"Error processing chunk: {str(e)}")
105
+
106
+ async def process_audio_chunk(chunk_path, target_language, request_id):
107
+ """Process an audio chunk: transcribe, translate, and convert to speech"""
108
+ try:
109
+ # Step 1: Transcribe the audio chunk
110
+ logger.info(f"Transcribing audio chunk: {chunk_path}")
111
+ result = model.transcribe(chunk_path)
112
+ transcription = result["text"]
113
+
114
+ # Step 2: Translate the transcription
115
+ logger.info(f"Translating text to {target_language}: {transcription[:50]}...")
116
+ translation = translator.translate(transcription, dest=target_language).text
117
+
118
+ # Step 3: Convert translation to speech
119
+ logger.info(f"Converting translation to speech: {translation[:50]}...")
120
+ tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
121
+ tts = gTTS(text=translation, lang=target_language)
122
+ tts.save(tts_output_path)
123
+
124
+ logger.info(f"Audio processing completed for request {request_id}")
125
+
126
+ except Exception as e:
127
+ logger.error(f"Error processing audio chunk: {e}")
128
+ # Cleanup
129
+ if os.path.exists(chunk_path):
130
+ os.remove(chunk_path)
131
+
132
+ @app.get("/get-audio/{request_id}")
133
+ async def get_audio(request_id: str):
134
+ """Get the processed audio for a specific request"""
135
+ tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
136
+
137
+ # Check if the file exists
138
+ if not os.path.exists(tts_output_path):
139
+ raise HTTPException(status_code=404, detail="Audio processing not completed yet or request ID invalid")
140
+
141
+ # Stream the audio file
142
+ def iterfile():
143
+ with open(tts_output_path, "rb") as f:
144
+ yield from f
145
+
146
+ # Clean up the files after streaming
147
+ try:
148
+ os.remove(tts_output_path)
149
+ chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
150
+ if os.path.exists(chunk_path):
151
+ os.remove(chunk_path)
152
+ except Exception as e:
153
+ logger.error(f"Error cleaning up files: {e}")
154
+
155
+ return StreamingResponse(
156
+ iterfile(),
157
+ media_type="audio/mpeg",
158
+ headers={"Content-Disposition": f"attachment; filename={request_id}.mp3"}
159
+ )
160
+
161
+ @app.get("/status/{request_id}")
162
+ async def check_status(request_id: str):
163
+ """Check the status of a processing request"""
164
+ tts_output_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}_tts.mp3")
165
+
166
+ if os.path.exists(tts_output_path):
167
+ return {"status": "completed", "request_id": request_id}
168
+ else:
169
+ # Check if the original chunk exists (meaning processing is in progress)
170
+ chunk_path = os.path.join(TEMP_DIR, "youtube_translator", f"{request_id}.mp3")
171
+ if os.path.exists(chunk_path):
172
+ return {"status": "processing", "request_id": request_id}
173
  else:
174
+ raise HTTPException(status_code=404, detail="Request ID not found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  @app.get("/")
177
  async def root():
178
+ return {"message": "YouTube Streaming Translator API"}
179
+
180
+ # Simple health check endpoint
181
+ @app.get("/health")
182
+ async def health_check():
183
+ return {"status": "healthy"}
184
+
185
+ if __name__ == "__main__":
186
+ import uvicorn
187
+ uvicorn.run(app, host="0.0.0.0", port=8000)