File size: 7,778 Bytes
d01de5d ef69efc d01de5d ef69efc d01de5d 130ce6d d01de5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
# main.py - Final Production-Ready Backend
from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel, Field
from starlette.responses import StreamingResponse
import logging
import io
# Import our engine blueprints
try:
from asr_engine import ASREngine
except ImportError:
ASREngine = None
try:
# Set environment variables to prevent numba caching issues
import os
os.environ['NUMBA_DISABLE_JIT'] = '1'
os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache'
from tts_engine import TTSEngine
TTS_AVAILABLE = True
except ImportError as e:
print(f"⚠️ TTS engine not available: {e}")
TTSEngine = None
TTS_AVAILABLE = False
try:
from translation_engine import TranslationEngine
except ImportError:
TranslationEngine = None
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- Data Models ---
class TranslationRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=1000, description="Text to translate")
target_lang: str = Field(..., description="Target language code")
class TTSRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech")
speaker: str = Field(default="p225", description="Speaker ID for TTS")
# --- App and AI Engine Initialization ---
app = FastAPI(
title="Carsa AI API",
description="Complete AI-powered translation and speech synthesis API",
version="1.0.0"
)
asr_engine = None
tts_engine = None
translation_engine = None
@app.on_event("startup")
def load_models():
"""Load all AI models into memory when the server starts."""
global asr_engine, tts_engine, translation_engine
try:
if TranslationEngine:
logger.info("Loading Translation Engine...")
translation_engine = TranslationEngine()
logger.info("✅ Translation Engine loaded")
else:
logger.warning("⚠️ Translation Engine not available")
if ASREngine:
logger.info("Loading ASR Engine...")
asr_engine = ASREngine()
logger.info("✅ ASR Engine loaded")
else:
logger.warning("⚠️ ASR Engine not available")
if TTS_AVAILABLE and TTSEngine:
logger.info("Loading TTS Engine...")
tts_engine = TTSEngine()
logger.info("✅ TTS Engine loaded")
else:
logger.warning("⚠️ TTS Engine not available")
logger.info("--- All available models loaded. API is ready. ---")
except Exception as e:
logger.error(f"Failed to load models: {e}")
raise e
# --- API Endpoints ---
@app.get("/")
def read_root():
return {
"status": "Carsa AI API is running",
"version": "1.0.0",
"services": ["translation", "speech-to-text", "text-to-speech"]
}
@app.get("/health")
def health_check():
"""Health check endpoint to verify all services are running."""
return {
"status": "healthy",
"translation_engine": translation_engine is not None,
"asr_engine": asr_engine is not None,
"tts_engine": tts_engine is not None
}
@app.post("/translate")
def translate_text(request: TranslationRequest):
if not translation_engine:
raise HTTPException(status_code=503, detail="Translation Engine not available.")
# Block Hausa requests (discontinued)
if request.target_lang.lower() == "hausa":
raise HTTPException(
status_code=400,
detail="Hausa translation has been discontinued. Please use Twi, Ga, or Ewe instead."
)
try:
result = translation_engine.translate(request.text, request.target_lang)
return {"translated_text": result}
except Exception as e:
logger.error(f"Translation Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
@app.post("/speech-to-text")
async def speech_to_text(audio_file: UploadFile = File(...)):
"""
Convert speech audio to text using ASR.
Accepts audio file uploads in various formats (wav, mp3, m4a, etc.)
"""
if not asr_engine:
raise HTTPException(status_code=503, detail="ASR Engine not available.")
# Validate file
if not audio_file:
raise HTTPException(status_code=422, detail="No audio file provided.")
if audio_file.size == 0:
raise HTTPException(status_code=422, detail="Audio file is empty.")
# Check file size (max 10MB)
if audio_file.size > 10 * 1024 * 1024:
raise HTTPException(status_code=422, detail="Audio file too large. Maximum size is 10MB.")
try:
logger.info(f"Processing audio file: {audio_file.filename}, size: {audio_file.size} bytes, content_type: {audio_file.content_type}")
audio_bytes = await audio_file.read()
if len(audio_bytes) == 0:
raise HTTPException(status_code=422, detail="Audio file contains no data.")
transcribed_text = asr_engine.transcribe(audio_bytes)
logger.info(f"ASR transcription successful: {transcribed_text[:50]}...")
return {"transcribed_text": transcribed_text}
except HTTPException:
raise
except Exception as e:
logger.error(f"ASR Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}")
@app.get("/supported-languages")
def get_supported_languages():
"""Get list of supported translation languages."""
if not translation_engine:
return {
"supported_languages": [],
"message": "Translation engine not available"
}
return {
"supported_languages": list(translation_engine.language_models.keys()),
"total_count": len(translation_engine.language_models),
"message": "These are the currently supported languages for translation"
}
@app.get("/tts/status")
def get_tts_status():
"""Get TTS engine status and information."""
if not tts_engine or not tts_engine.model:
return {
"status": "unavailable",
"message": "TTS engine is not loaded"
}
try:
model_info = tts_engine.get_model_info()
return {
"status": "available",
"model_info": model_info
}
except Exception as e:
logger.error(f"Error getting TTS status: {e}")
return {
"status": "error",
"message": f"Error getting TTS status: {str(e)}"
}
@app.post("/text-to-speech")
async def text_to_speech(request: TTSRequest):
if not TTS_AVAILABLE:
raise HTTPException(
status_code=503,
detail="TTS Engine not available. Requires Python 3.11 or lower to install TTS library."
)
if not tts_engine or not tts_engine.model:
raise HTTPException(status_code=503, detail="TTS Engine not loaded.")
try:
# Use the new synthesize_to_bytes method
audio_bytes = tts_engine.synthesize_to_bytes(
text=request.text,
speaker=request.speaker
)
# Return audio as streaming response
return StreamingResponse(
io.BytesIO(audio_bytes),
media_type="audio/wav",
headers={
"Content-Disposition": "attachment; filename=speech.wav",
"Content-Length": str(len(audio_bytes))
}
)
except Exception as e:
logger.error(f"TTS Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Failed to generate speech.") |