RealtimeTranslator / server.py
Mike W
High risk async refactor
046ebfc
import asyncio
import os
import ffmpeg
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from dotenv import load_dotenv
from translator import VoiceTranslator
load_dotenv()
app = FastAPI()
#app.mount("/static", StaticFiles(directory="static"), name="static")
# Load environment variables for API keys
google_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
deepl_key = os.getenv("DEEPL_API_KEY")
eleven_key = os.getenv("ELEVENLABS_API_KEY")
voice_id = os.getenv("ELEVENLABS_VOICE_ID")
# --- Start Debug Prints ---
print("--- API Key Status ---")
print(f"GOOGLE_APPLICATION_CREDENTIALS loaded: {bool(google_creds)}")
print(f"DEEPL_API_KEY loaded: {bool(deepl_key)}")
print(f"ELEVENLABS_API_KEY loaded: {bool(eleven_key)}")
print(f"ELEVENLABS_VOICE_ID loaded: {bool(voice_id)}")
print("----------------------")
if not all([google_creds, deepl_key, eleven_key, voice_id]):
raise ValueError("CRITICAL: Missing one or more required API keys. Please check your Hugging Face secrets.")
translator = VoiceTranslator(deepl_key, eleven_key, voice_id)
@app.get("/")
async def get():
return HTMLResponse(open("index.html", "r").read())
async def audio_output_sender(ws: WebSocket, output_queue: asyncio.Queue):
print("Audio output sender started.")
while True:
try:
audio_chunk = await output_queue.get()
if audio_chunk is None:
break
await ws.send_bytes(audio_chunk)
except asyncio.CancelledError:
break
print("Audio output sender stopped.")
async def handle_audio_input(websocket: WebSocket, input_queue: asyncio.Queue):
print("Audio input handler started.")
while True:
try:
data = await websocket.receive_bytes()
# Use ffmpeg to convert webm/opus audio from browser to raw pcm
process = (
ffmpeg
.input('pipe:0')
.output('pipe:1', format='s16le', acodec='pcm_s16le', ac=1, ar='16k')
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
)
# Write the audio data to ffmpeg's stdin and close it
process.stdin.write(data)
process.stdin.close()
# Read the converted PCM data from ffmpeg's stdout
pcm_data = process.stdout.read()
await input_queue.put(pcm_data)
except WebSocketDisconnect:
break
except Exception as e:
print(f"Audio input error: {e}")
break
print("Audio input handler stopped.")
@app.websocket("/ws") # This was correct, the error was in the old HTML. No change needed here, but confirming it's /ws.
async def websocket_endpoint(websocket: WebSocket):
print("[SERVER] WebSocket endpoint called. Awaiting connection...")
await websocket.accept()
print("[SERVER] WebSocket connection accepted.")
output_sender_task = None
input_handler_task = None
try:
# Start translation and audio processing tasks
print("[SERVER] Awaiting translator.start_translation()...")
await translator.start_translation()
print("[SERVER] translator.start_translation() returned. Creating tasks...")
output_sender_task = asyncio.create_task(
audio_output_sender(websocket, translator.output_queue)
)
input_handler_task = asyncio.create_task(
handle_audio_input(websocket, translator.input_queue)
)
print("[SERVER] Awaiting asyncio.gather for I/O tasks...")
await asyncio.gather(input_handler_task, output_sender_task)
except WebSocketDisconnect:
print("[SERVER] Client disconnected via WebSocketDisconnect.")
except Exception as e:
print(f"[SERVER] An error occurred in websocket_endpoint: {e}")
finally:
print("[SERVER] Cleaning up tasks and stopping translation...")
if output_sender_task:
output_sender_task.cancel()
if input_handler_task:
input_handler_task.cancel()
translator.stop_translation()
await websocket.close()
print("[SERVER] WebSocket connection closed.")