File size: 7,778 Bytes
d01de5d
 
 
 
 
 
 
 
 
 
 
 
 
 
ef69efc
 
 
 
 
 
 
 
 
 
 
d01de5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef69efc
 
 
 
 
 
d01de5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130ce6d
 
 
 
 
 
 
d01de5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# main.py - Final Production-Ready Backend

from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel, Field
from starlette.responses import StreamingResponse
import logging
import io

# Import our engine blueprints
try:
    from asr_engine import ASREngine
except ImportError:
    ASREngine = None
    
try:
    # Set environment variables to prevent numba caching issues
    import os
    os.environ['NUMBA_DISABLE_JIT'] = '1'
    os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache'
    from tts_engine import TTSEngine
    TTS_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ TTS engine not available: {e}")
    TTSEngine = None
    TTS_AVAILABLE = False

try:
    from translation_engine import TranslationEngine
except ImportError:
    TranslationEngine = None

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- Data Models ---
class TranslationRequest(BaseModel):
    text: str = Field(..., min_length=1, max_length=1000, description="Text to translate")
    target_lang: str = Field(..., description="Target language code")

class TTSRequest(BaseModel):
    text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech")
    speaker: str = Field(default="p225", description="Speaker ID for TTS")
    
# --- App and AI Engine Initialization ---
app = FastAPI(
    title="Carsa AI API",
    description="Complete AI-powered translation and speech synthesis API",
    version="1.0.0"
)

asr_engine = None
tts_engine = None
translation_engine = None

@app.on_event("startup")
def load_models():
    """Load all AI models into memory when the server starts."""
    global asr_engine, tts_engine, translation_engine
    
    try:
        if TranslationEngine:
            logger.info("Loading Translation Engine...")
            translation_engine = TranslationEngine()
            logger.info("✅ Translation Engine loaded")
        else:
            logger.warning("⚠️ Translation Engine not available")
        
        if ASREngine:
            logger.info("Loading ASR Engine...")
            asr_engine = ASREngine()
            logger.info("✅ ASR Engine loaded")
        else:
            logger.warning("⚠️ ASR Engine not available")
        
        if TTS_AVAILABLE and TTSEngine:
            logger.info("Loading TTS Engine...")
            tts_engine = TTSEngine()
            logger.info("✅ TTS Engine loaded")
        else:
            logger.warning("⚠️ TTS Engine not available")
        
        logger.info("--- All available models loaded. API is ready. ---")
    except Exception as e:
        logger.error(f"Failed to load models: {e}")
        raise e

# --- API Endpoints ---
@app.get("/")
def read_root():
    return {
        "status": "Carsa AI API is running",
        "version": "1.0.0",
        "services": ["translation", "speech-to-text", "text-to-speech"]
    }

@app.get("/health")
def health_check():
    """Health check endpoint to verify all services are running."""
    return {
        "status": "healthy",
        "translation_engine": translation_engine is not None,
        "asr_engine": asr_engine is not None,
        "tts_engine": tts_engine is not None
    }

@app.post("/translate")
def translate_text(request: TranslationRequest):
    if not translation_engine:
        raise HTTPException(status_code=503, detail="Translation Engine not available.")
    
    # Block Hausa requests (discontinued)
    if request.target_lang.lower() == "hausa":
        raise HTTPException(
            status_code=400, 
            detail="Hausa translation has been discontinued. Please use Twi, Ga, or Ewe instead."
        )
    
    try:
        result = translation_engine.translate(request.text, request.target_lang)
        return {"translated_text": result}
    except Exception as e:
        logger.error(f"Translation Error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")

@app.post("/speech-to-text")
async def speech_to_text(audio_file: UploadFile = File(...)):
    """
    Convert speech audio to text using ASR.
    Accepts audio file uploads in various formats (wav, mp3, m4a, etc.)
    """
    if not asr_engine:
        raise HTTPException(status_code=503, detail="ASR Engine not available.")
    
    # Validate file
    if not audio_file:
        raise HTTPException(status_code=422, detail="No audio file provided.")
    
    if audio_file.size == 0:
        raise HTTPException(status_code=422, detail="Audio file is empty.")
        
    # Check file size (max 10MB)
    if audio_file.size > 10 * 1024 * 1024:
        raise HTTPException(status_code=422, detail="Audio file too large. Maximum size is 10MB.")
        
    try:
        logger.info(f"Processing audio file: {audio_file.filename}, size: {audio_file.size} bytes, content_type: {audio_file.content_type}")
        audio_bytes = await audio_file.read()
        
        if len(audio_bytes) == 0:
            raise HTTPException(status_code=422, detail="Audio file contains no data.")
            
        transcribed_text = asr_engine.transcribe(audio_bytes)
        logger.info(f"ASR transcription successful: {transcribed_text[:50]}...")
        return {"transcribed_text": transcribed_text}
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"ASR Error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}")

@app.get("/supported-languages")
def get_supported_languages():
    """Get list of supported translation languages."""
    if not translation_engine:
        return {
            "supported_languages": [],
            "message": "Translation engine not available"
        }
    
    return {
        "supported_languages": list(translation_engine.language_models.keys()),
        "total_count": len(translation_engine.language_models),
        "message": "These are the currently supported languages for translation"
    }

@app.get("/tts/status")
def get_tts_status():
    """Get TTS engine status and information."""
    if not tts_engine or not tts_engine.model:
        return {
            "status": "unavailable",
            "message": "TTS engine is not loaded"
        }
    
    try:
        model_info = tts_engine.get_model_info()
        return {
            "status": "available",
            "model_info": model_info
        }
    except Exception as e:
        logger.error(f"Error getting TTS status: {e}")
        return {
            "status": "error",
            "message": f"Error getting TTS status: {str(e)}"
        }

@app.post("/text-to-speech")
async def text_to_speech(request: TTSRequest):
    if not TTS_AVAILABLE:
        raise HTTPException(
            status_code=503, 
            detail="TTS Engine not available. Requires Python 3.11 or lower to install TTS library."
        )
    
    if not tts_engine or not tts_engine.model:
        raise HTTPException(status_code=503, detail="TTS Engine not loaded.")
    
    try:
        # Use the new synthesize_to_bytes method
        audio_bytes = tts_engine.synthesize_to_bytes(
            text=request.text,
            speaker=request.speaker
        )
        
        # Return audio as streaming response
        return StreamingResponse(
            io.BytesIO(audio_bytes),
            media_type="audio/wav",
            headers={
                "Content-Disposition": "attachment; filename=speech.wav",
                "Content-Length": str(len(audio_bytes))
            }
        )
    except Exception as e:
        logger.error(f"TTS Error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Failed to generate speech.")