Spaces:

CarsaAI
/

carsa_api

Running

File size: 7,778 Bytes

# main.py - Final Production-Ready Backend

from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel, Field
from starlette.responses import StreamingResponse
import logging
import io

# Import our engine blueprints
try:
    from asr_engine import ASREngine
except ImportError:
    ASREngine = None
    
try:
    # Set environment variables to prevent numba caching issues
    import os
    os.environ['NUMBA_DISABLE_JIT'] = '1'
    os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache'
    from tts_engine import TTSEngine
    TTS_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ TTS engine not available: {e}")
    TTSEngine = None
    TTS_AVAILABLE = False

try:
    from translation_engine import TranslationEngine
except ImportError:
    TranslationEngine = None

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- Data Models ---
class TranslationRequest(BaseModel):
    text: str = Field(..., min_length=1, max_length=1000, description="Text to translate")
    target_lang: str = Field(..., description="Target language code")

class TTSRequest(BaseModel):
    text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech")
    speaker: str = Field(default="p225", description="Speaker ID for TTS")
    
# --- App and AI Engine Initialization ---
app = FastAPI(
    title="Carsa AI API",
    description="Complete AI-powered translation and speech synthesis API",
    version="1.0.0"
)

asr_engine = None
tts_engine = None
translation_engine = None

@app.on_event("startup")
def load_models():
    """Load all AI models into memory when the server starts."""
    global asr_engine, tts_engine, translation_engine
    
    try:
        if TranslationEngine:
            logger.info("Loading Translation Engine...")
            translation_engine = TranslationEngine()
            logger.info("✅ Translation Engine loaded")
        else:
            logger.warning("⚠️ Translation Engine not available")
        
        if ASREngine:
            logger.info("Loading ASR Engine...")
            asr_engine = ASREngine()
            logger.info("✅ ASR Engine loaded")
        else:
            logger.warning("⚠️ ASR Engine not available")
        
        if TTS_AVAILABLE and TTSEngine:
            logger.info("Loading TTS Engine...")
            tts_engine = TTSEngine()
            logger.info("✅ TTS Engine loaded")
        else:
            logger.warning("⚠️ TTS Engine not available")
        
        logger.info("--- All available models loaded. API is ready. ---")
    except Exception as e:
        logger.error(f"Failed to load models: {e}")
        raise e

# --- API Endpoints ---
@app.get("/")
def read_root():
    return {
        "status": "Carsa AI API is running",
        "version": "1.0.0",
        "services": ["translation", "speech-to-text", "text-to-speech"]
    }

@app.get("/health")
def health_check():
    """Health check endpoint to verify all services are running."""
    return {
        "status": "healthy",
        "translation_engine": translation_engine is not None,
        "asr_engine": asr_engine is not None,
        "tts_engine": tts_engine is not None
    }

@app.post("/translate")
def translate_text(request: TranslationRequest):
    if not translation_engine:
        raise HTTPException(status_code=503, detail="Translation Engine not available.")
    
    # Block Hausa requests (discontinued)
    if request.target_lang.lower() == "hausa":
        raise HTTPException(
            status_code=400, 
            detail="Hausa translation has been discontinued. Please use Twi, Ga, or Ewe instead."
        )
    
    try:
        result = translation_engine.translate(request.text, request.target_lang)
        return {"translated_text": result}
    except Exception as e:
        logger.error(f"Translation Error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")

@app.post("/speech-to-text")
async def speech_to_text(audio_file: UploadFile = File(...)):
    """
    Convert speech audio to text using ASR.
    Accepts audio file uploads in various formats (wav, mp3, m4a, etc.)
    """
    if not asr_engine:
        raise HTTPException(status_code=503, detail="ASR Engine not available.")
    
    # Validate file
    if not audio_file:
        raise HTTPException(status_code=422, detail="No audio file provided.")
    
    if audio_file.size == 0:
        raise HTTPException(status_code=422, detail="Audio file is empty.")
        
    # Check file size (max 10MB)
    if audio_file.size > 10 * 1024 * 1024:
        raise HTTPException(status_code=422, detail="Audio file too large. Maximum size is 10MB.")
        
    try:
        logger.info(f"Processing audio file: {audio_file.filename}, size: {audio_file.size} bytes, content_type: {audio_file.content_type}")
        audio_bytes = await audio_file.read()
        
        if len(audio_bytes) == 0:
            raise HTTPException(status_code=422, detail="Audio file contains no data.")
            
        transcribed_text = asr_engine.transcribe(audio_bytes)
        logger.info(f"ASR transcription successful: {transcribed_text[:50]}...")
        return {"transcribed_text": transcribed_text}
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"ASR Error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}")

@app.get("/supported-languages")
def get_supported_languages():
    """Get list of supported translation languages."""
    if not translation_engine:
        return {
            "supported_languages": [],
            "message": "Translation engine not available"
        }
    
    return {
        "supported_languages": list(translation_engine.language_models.keys()),
        "total_count": len(translation_engine.language_models),
        "message": "These are the currently supported languages for translation"
    }

@app.get("/tts/status")
def get_tts_status():
    """Get TTS engine status and information."""
    if not tts_engine or not tts_engine.model:
        return {
            "status": "unavailable",
            "message": "TTS engine is not loaded"
        }
    
    try:
        model_info = tts_engine.get_model_info()
        return {
            "status": "available",
            "model_info": model_info
        }
    except Exception as e:
        logger.error(f"Error getting TTS status: {e}")
        return {
            "status": "error",
            "message": f"Error getting TTS status: {str(e)}"
        }

@app.post("/text-to-speech")
async def text_to_speech(request: TTSRequest):
    if not TTS_AVAILABLE:
        raise HTTPException(
            status_code=503, 
            detail="TTS Engine not available. Requires Python 3.11 or lower to install TTS library."
        )
    
    if not tts_engine or not tts_engine.model:
        raise HTTPException(status_code=503, detail="TTS Engine not loaded.")
    
    try:
        # Use the new synthesize_to_bytes method
        audio_bytes = tts_engine.synthesize_to_bytes(
            text=request.text,
            speaker=request.speaker
        )
        
        # Return audio as streaming response
        return StreamingResponse(
            io.BytesIO(audio_bytes),
            media_type="audio/wav",
            headers={
                "Content-Disposition": "attachment; filename=speech.wav",
                "Content-Length": str(len(audio_bytes))
            }
        )
    except Exception as e:
        logger.error(f"TTS Error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Failed to generate speech.")