Spaces:

Jerich
/

TalklasApp

Paused

App Files Files Community

Jerich commited on Apr 9, 2025

Commit

ec27d4d

verified ·

1 Parent(s): c10c930

Add STT functionality with openai/whisper-tiny

Browse files

Files changed (1) hide show

app.py +193 -15

app.py CHANGED Viewed

@@ -1,12 +1,104 @@
 import logging
-from fastapi import FastAPI, HTTPException, Form
 from fastapi.responses import JSONResponse
 # Configure logging
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("minimal-api")
-app = FastAPI(title="Minimal API Test")
 @app.get("/")
 async def root():
@@ -16,23 +108,109 @@ async def root():
 @app.get("/health")
 async def health_check():
-    """Health check endpoint to confirm the app is running"""
     logger.info("Health check requested")
-    return {"status": "healthy"}
-@app.get("/ping")
-async def ping():
-    """Simple ping endpoint to test GET requests"""
-    logger.info("Ping requested")
-    return {"message": "pong"}
-@app.post("/echo")
-async def echo(text: str = Form(...)):
-    """Echo endpoint to test POST requests with form data"""
     if not text:
         raise HTTPException(status_code=400, detail="No text provided")
-    logger.info(f"Echo requested with text: {text}")
-    return {"received_text": text}
 if __name__ == "__main__":
     import uvicorn

+import os
+os.environ["HOME"] = "/root"
+os.environ["HF_HOME"] = "/tmp/hf_cache"
 import logging
+import threading
+import tempfile
+import uuid
+import numpy as np
+import soundfile as sf
+from fastapi import FastAPI, HTTPException, UploadFile, File, Form
 from fastapi.responses import JSONResponse
+from typing import Dict, Any, Optional
 # Configure logging
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("talklas-api")
+app = FastAPI(title="Talklas API")
+# Global variables to track application state
+models_loaded = False
+loading_in_progress = False
+loading_thread = None
+model_status = {
+    "stt": "not_loaded",
+    "mt": "not_loaded",
+    "tts": "not_loaded"
+}
+error_message = None
+# STT model and processor (will be loaded in background)
+stt_processor = None
+stt_model = None
+# Define the valid languages
+LANGUAGE_MAPPING = {
+    "English": "eng",
+    "Tagalog": "tgl",
+    "Cebuano": "ceb",
+    "Ilocano": "ilo",
+    "Waray": "war",
+    "Pangasinan": "pag"
+}
+# Function to load models in background
+def load_models_task():
+    global models_loaded, loading_in_progress, model_status, error_message, stt_processor, stt_model
+    try:
+        loading_in_progress = True
+        # Import heavy libraries only when needed
+        logger.info("Starting to load STT model...")
+        import torch
+        from transformers import WhisperProcessor, WhisperForConditionalGeneration
+        # Load STT model
+        try:
+            logger.info("Loading Whisper model...")
+            model_status["stt"] = "loading"
+            stt_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
+            stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            stt_model.to(device)
+            logger.info("STT model loaded successfully")
+            model_status["stt"] = "loaded"
+        except Exception as e:
+            logger.error(f"Failed to load STT model: {str(e)}")
+            model_status["stt"] = "failed"
+            error_message = f"STT model loading failed: {str(e)}"
+            return
+        # Skip MT and TTS models for now to save memory
+        model_status["mt"] = "skipped"
+        model_status["tts"] = "skipped"
+        logger.info("MT and TTS models skipped to save memory")
+        models_loaded = True
+        logger.info("Model loading completed successfully")
+    except Exception as e:
+        error_message = str(e)
+        logger.error(f"Error in model loading task: {str(e)}")
+    finally:
+        loading_in_progress = False
+# Start loading models in background
+def start_model_loading():
+    global loading_thread, loading_in_progress
+    if not loading_in_progress and not models_loaded:
+        loading_in_progress = True
+        loading_thread = threading.Thread(target=load_models_task)
+        loading_thread.daemon = True
+        loading_thread.start()
+# Start the background process when the app starts
+@app.on_event("startup")
+async def startup_event():
+    logger.info("Application starting up...")
+    start_model_loading()
 @app.get("/")
 async def root():
 @app.get("/health")
 async def health_check():
+    """Health check endpoint that always returns successfully"""
+    global models_loaded, loading_in_progress, model_status, error_message
     logger.info("Health check requested")
+    return {
+        "status": "healthy",
+        "models_loaded": models_loaded,
+        "loading_in_progress": loading_in_progress,
+        "model_status": model_status,
+        "error": error_message
+    }
+@app.post("/update-languages")
+async def update_languages(source_lang: str = Form(...), target_lang: str = Form(...)):
+    if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
+        raise HTTPException(status_code=400, detail="Invalid language selected")
+    logger.info(f"Updating languages: {source_lang} → {target_lang}")
+    return {"status": f"Languages updated to {source_lang} → {target_lang}"}
+@app.post("/translate-text")
+async def translate_text(text: str = Form(...), source_lang: str = Form(...), target_lang: str = Form(...)):
+    """Endpoint that creates a placeholder for text translation"""
     if not text:
         raise HTTPException(status_code=400, detail="No text provided")
+    if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
+        raise HTTPException(status_code=400, detail="Invalid language selected")
+    logger.info(f"Translate-text requested: {text} from {source_lang} to {target_lang}")
+    request_id = str(uuid.uuid4())
+    return {
+        "request_id": request_id,
+        "status": "processing",
+        "message": "Translation not implemented yet (MT model not loaded).",
+        "source_text": text,
+        "translated_text": "Translation not available",
+        "output_audio": None
+    }
+@app.post("/translate-audio")
+async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form(...), target_lang: str = Form(...)):
+    """Endpoint to transcribe audio using STT"""
+    global stt_processor, stt_model
+    if not audio:
+        raise HTTPException(status_code=400, detail="No audio file provided")
+    if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
+        raise HTTPException(status_code=400, detail="Invalid language selected")
+    logger.info(f"Translate-audio requested: {audio.filename} from {source_lang} to {target_lang}")
+    request_id = str(uuid.uuid4())
+    # Check if STT model is loaded
+    if model_status["stt"] != "loaded" or stt_processor is None or stt_model is None:
+        logger.warning("STT model not loaded, returning placeholder response")
+        return {
+            "request_id": request_id,
+            "status": "processing",
+            "message": "STT model not loaded yet. Please try again later.",
+            "source_text": "Transcription not available",
+            "translated_text": "Translation not available",
+            "output_audio": None
+        }
+    # Save the uploaded audio to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
+        temp_file.write(await audio.read())
+        temp_path = temp_file.name
+    try:
+        # Read and preprocess the audio
+        waveform, sample_rate = sf.read(temp_path)
+        if sample_rate != 16000:
+            logger.info(f"Resampling audio from {sample_rate} Hz to 16000 Hz")
+            import librosa
+            waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
+        # Process the audio with Whisper
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        inputs = stt_processor(waveform, sampling_rate=16000, return_tensors="pt").to(device)
+        with torch.no_grad():
+            generated_ids = stt_model.generate(**inputs)
+            transcription = stt_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        logger.info(f"Transcription completed: {transcription}")
+        return {
+            "request_id": request_id,
+            "status": "completed",
+            "message": "Transcription completed successfully. Translation and TTS not implemented yet.",
+            "source_text": transcription,
+            "translated_text": "Translation not available",
+            "output_audio": None
+        }
+    except Exception as e:
+        logger.error(f"Error during transcription: {str(e)}")
+        return {
+            "request_id": request_id,
+            "status": "failed",
+            "message": f"Transcription failed: {str(e)}",
+            "source_text": "Transcription not available",
+            "translated_text": "Translation not available",
+            "output_audio": None
+        }
+    finally:
+        os.unlink(temp_path)
 if __name__ == "__main__":
     import uvicorn