import asyncio import os import ffmpeg from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles from dotenv import load_dotenv from translator import VoiceTranslator load_dotenv() app = FastAPI() #app.mount("/static", StaticFiles(directory="static"), name="static") # Load environment variables for API keys google_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") deepl_key = os.getenv("DEEPL_API_KEY") eleven_key = os.getenv("ELEVENLABS_API_KEY") voice_id = os.getenv("ELEVENLABS_VOICE_ID") # --- Start Debug Prints --- print("--- API Key Status ---") print(f"GOOGLE_APPLICATION_CREDENTIALS loaded: {bool(google_creds)}") print(f"DEEPL_API_KEY loaded: {bool(deepl_key)}") print(f"ELEVENLABS_API_KEY loaded: {bool(eleven_key)}") print(f"ELEVENLABS_VOICE_ID loaded: {bool(voice_id)}") print("----------------------") if not all([google_creds, deepl_key, eleven_key, voice_id]): raise ValueError("CRITICAL: Missing one or more required API keys. Please check your Hugging Face secrets.") translator = VoiceTranslator(deepl_key, eleven_key, voice_id) @app.get("/") async def get(): return HTMLResponse(open("index.html", "r").read()) async def audio_output_sender(ws: WebSocket, output_queue: asyncio.Queue): print("Audio output sender started.") while True: try: audio_chunk = await output_queue.get() if audio_chunk is None: break await ws.send_bytes(audio_chunk) except asyncio.CancelledError: break print("Audio output sender stopped.") async def handle_audio_input(websocket: WebSocket, input_queue: asyncio.Queue): print("Audio input handler started.") while True: try: data = await websocket.receive_bytes() # Use ffmpeg to convert webm/opus audio from browser to raw pcm process = ( ffmpeg .input('pipe:0') .output('pipe:1', format='s16le', acodec='pcm_s16le', ac=1, ar='16k') .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True) ) # Write the audio data to ffmpeg's stdin and close it process.stdin.write(data) process.stdin.close() # Read the converted PCM data from ffmpeg's stdout pcm_data = process.stdout.read() await input_queue.put(pcm_data) except WebSocketDisconnect: break except Exception as e: print(f"Audio input error: {e}") break print("Audio input handler stopped.") @app.websocket("/ws") # This was correct, the error was in the old HTML. No change needed here, but confirming it's /ws. async def websocket_endpoint(websocket: WebSocket): print("[SERVER] WebSocket endpoint called. Awaiting connection...") await websocket.accept() print("[SERVER] WebSocket connection accepted.") output_sender_task = None input_handler_task = None try: # Start translation and audio processing tasks print("[SERVER] Awaiting translator.start_translation()...") await translator.start_translation() print("[SERVER] translator.start_translation() returned. Creating tasks...") output_sender_task = asyncio.create_task( audio_output_sender(websocket, translator.output_queue) ) input_handler_task = asyncio.create_task( handle_audio_input(websocket, translator.input_queue) ) print("[SERVER] Awaiting asyncio.gather for I/O tasks...") await asyncio.gather(input_handler_task, output_sender_task) except WebSocketDisconnect: print("[SERVER] Client disconnected via WebSocketDisconnect.") except Exception as e: print(f"[SERVER] An error occurred in websocket_endpoint: {e}") finally: print("[SERVER] Cleaning up tasks and stopping translation...") if output_sender_task: output_sender_task.cancel() if input_handler_task: input_handler_task.cancel() translator.stop_translation() await websocket.close() print("[SERVER] WebSocket connection closed.")