Spaces:

Jerich
/

TalklasApp2

Sleeping

App Files Files Community

Jerich commited on Apr 28, 2025

Commit

e45bb49

verified ·

1 Parent(s): 798e9af

Update app.py

Browse files

Files changed (1) hide show

app.py +458 -474

app.py CHANGED Viewed

@@ -31,20 +31,24 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
 app.mount("/audio_output", StaticFiles(directory=AUDIO_DIR), name="audio_output")
 # Global variables to track application state
-model_cache = {
-    "stt_whisper": {"model": None, "processor": None, "status": "not_loaded"},
-    "stt_mms": {"model": None, "processor": None, "status": "not_loaded"},
-    "mt": {"model": None, "tokenizer": None, "status": "not_loaded"},
-    "tts": {"model": None, "tokenizer": None, "status": "not_loaded", "language": None}
 }
-# Track loading status
-loading_locks = {
-    "stt_whisper": threading.Lock(),
-    "stt_mms": threading.Lock(),
-    "mt": threading.Lock(),
-    "tts": threading.Lock()
-}
 # Define the valid languages and mappings
 LANGUAGE_MAPPING = {
@@ -65,26 +69,28 @@ NLLB_LANGUAGE_CODES = {
     "pag": "pag_Latn"
 }
-# Inappropriate words list - this is a basic implementation
-# In a production environment, you would use a more comprehensive solution
 INAPPROPRIATE_WORDS = [
-    "putang", "tang ina", "gago", "puta", "bobo", "ulol", "pakyu", "tae",
-    "obscenity", "profanity", "explicit", "nsfw", "offensive"
 ]
-# Function to detect inappropriate content
-def detect_inappropriate_content(text: str) -> bool:
     """
-    Checks if the text contains any inappropriate words
     """
     text_lower = text.lower()
     for word in INAPPROPRIATE_WORDS:
-        if word in text_lower:
             return True
     return False
 # Function to save PCM data as a WAV file
 def save_pcm_to_wav(pcm_data: list, sample_rate: int, output_path: str):
     # Convert pcm_data to a NumPy array of 16-bit integers
@@ -98,7 +104,6 @@ def save_pcm_to_wav(pcm_data: list, sample_rate: int, output_path: str):
         # Write the 16-bit PCM data as bytes (little-endian)
         wav_file.writeframes(pcm_array.tobytes())
 # Function to detect speech using an energy-based approach
 def detect_speech(waveform: torch.Tensor, sample_rate: int, threshold: float = 0.01, min_speech_duration: float = 0.5) -> bool:
     """
@@ -123,7 +128,6 @@ def detect_speech(waveform: torch.Tensor, sample_rate: int, threshold: float = 0
     # For now, we assume if RMS is above threshold, there is speech
     return True
 # Function to clean up old audio files
 def cleanup_old_audio_files():
     logger.info("Starting cleanup of old audio files...")
@@ -139,157 +143,112 @@ def cleanup_old_audio_files():
                 except Exception as e:
                     logger.error(f"Error deleting file {file_path}: {str(e)}")
 # Background task to periodically clean up audio files
 def schedule_cleanup():
     while True:
         cleanup_old_audio_files()
         time.sleep(300)  # Run every 5 minutes (300 seconds)
-# Function to load the Whisper STT model on demand
-def load_whisper_model():
-    if model_cache["stt_whisper"]["status"] == "loaded":
-        return True
-    # Use lock to prevent multiple threads from loading the model simultaneously
-    if not loading_locks["stt_whisper"].acquire(blocking=False):
-        logger.info("Whisper model loading already in progress")
-        return False
-    try:
-        logger.info("Loading Whisper small model...")
-        model_cache["stt_whisper"]["status"] = "loading"
-        from transformers import WhisperProcessor, WhisperForConditionalGeneration
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        model_cache["stt_whisper"]["processor"] = WhisperProcessor.from_pretrained("openai/whisper-small")
-        model_cache["stt_whisper"]["model"] = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
-        model_cache["stt_whisper"]["model"].to(device)
-        model_cache["stt_whisper"]["status"] = "loaded"
-        logger.info("Whisper small model loaded successfully")
-        return True
-    except Exception as e:
-        model_cache["stt_whisper"]["status"] = "failed"
-        logger.error(f"Failed to load Whisper model: {str(e)}")
-        return False
-    finally:
-        loading_locks["stt_whisper"].release()
-# Function to load the MMS STT model on demand
-def load_mms_stt_model():
-    if model_cache["stt_mms"]["status"] == "loaded":
-        return True
-    if not loading_locks["stt_mms"].acquire(blocking=False):
-        logger.info("MMS STT model loading already in progress")
-        return False
     try:
-        logger.info("Loading MMS STT model...")
-        model_cache["stt_mms"]["status"] = "loading"
-        from transformers import AutoProcessor, AutoModelForCTC
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        model_cache["stt_mms"]["processor"] = AutoProcessor.from_pretrained("facebook/mms-1b-all")
-        model_cache["stt_mms"]["model"] = AutoModelForCTC.from_pretrained("facebook/mms-1b-all")
-        model_cache["stt_mms"]["model"].to(device)
-        model_cache["stt_mms"]["status"] = "loaded"
-        logger.info("MMS STT model loaded successfully")
-        return True
-    except Exception as e:
-        model_cache["stt_mms"]["status"] = "failed"
-        logger.error(f"Failed to load MMS STT model: {str(e)}")
-        return False
-    finally:
-        loading_locks["stt_mms"].release()
-# Function to load the MT model on demand
-def load_mt_model():
-    if model_cache["mt"]["status"] == "loaded":
-        return True
-    if not loading_locks["mt"].acquire(blocking=False):
-        logger.info("MT model loading already in progress")
-        return False
-    try:
-        logger.info("Loading NLLB-200-distilled-600M model...")
-        model_cache["mt"]["status"] = "loading"
         from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        model_cache["mt"]["model"] = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
-        model_cache["mt"]["tokenizer"] = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
-        model_cache["mt"]["model"].to(device)
-        model_cache["mt"]["status"] = "loaded"
-        logger.info("MT model loaded successfully")
-        return True
-    except Exception as e:
-        model_cache["mt"]["status"] = "failed"
-        logger.error(f"Failed to load MT model: {str(e)}")
-        return False
-    finally:
-        loading_locks["mt"].release()
-# Function to load the TTS model for a specific language on demand
-def load_tts_model(language_code: str):
-    # If the model is already loaded for this language, return immediately
-    if (model_cache["tts"]["status"] == "loaded" and
-        model_cache["tts"]["language"] == language_code):
-        return True
-    if not loading_locks["tts"].acquire(blocking=False):
-        logger.info("TTS model loading already in progress")
-        return False
-    try:
-        logger.info(f"Loading MMS-TTS model for {language_code}...")
-        model_cache["tts"]["status"] = "loading"
         from transformers import VitsModel, AutoTokenizer
-        device = "cuda" if torch.cuda.is_available() else "cpu"
         try:
-            model_cache["tts"]["model"] = VitsModel.from_pretrained(f"facebook/mms-tts-{language_code}")
-            model_cache["tts"]["tokenizer"] = AutoTokenizer.from_pretrained(f"facebook/mms-tts-{language_code}")
-            model_cache["tts"]["model"].to(device)
-            model_cache["tts"]["language"] = language_code
-            model_cache["tts"]["status"] = "loaded"
-            logger.info(f"TTS model for {language_code} loaded successfully")
-            return True
         except Exception as e:
-            logger.error(f"Failed to load TTS model for {language_code}: {str(e)}")
             # Fallback to English TTS if the target language fails
             try:
                 logger.info("Falling back to MMS-TTS English model...")
-                model_cache["tts"]["model"] = VitsModel.from_pretrained("facebook/mms-tts-eng")
-                model_cache["tts"]["tokenizer"] = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
-                model_cache["tts"]["model"].to(device)
-                model_cache["tts"]["language"] = "eng"
-                model_cache["tts"]["status"] = "loaded (fallback)"
                 logger.info("Fallback TTS model loaded successfully")
-                return True
             except Exception as e2:
-                model_cache["tts"]["status"] = "failed"
                 logger.error(f"Failed to load fallback TTS model: {str(e2)}")
-                return False
     except Exception as e:
-        model_cache["tts"]["status"] = "failed"
-        logger.error(f"Failed to setup TTS model: {str(e)}")
-        return False
     finally:
-        loading_locks["tts"].release()
 # Start the background cleanup task
 def start_cleanup_task():
@@ -297,130 +256,116 @@ def start_cleanup_task():
     cleanup_thread.daemon = True
     cleanup_thread.start()
 # Start the background processes when the app starts
 @app.on_event("startup")
 async def startup_event():
     logger.info("Application starting up...")
     start_cleanup_task()
 @app.get("/")
 async def root():
     """Root endpoint for default health check"""
     logger.info("Root endpoint requested")
     return {"status": "healthy"}
 @app.get("/health")
 async def health_check():
     """Health check endpoint that always returns successfully"""
     logger.info("Health check requested")
     return {
         "status": "healthy",
-        "model_status": {
-            "stt_whisper": model_cache["stt_whisper"]["status"],
-            "stt_mms": model_cache["stt_mms"]["status"],
-            "mt": model_cache["mt"]["status"],
-            "tts": model_cache["tts"]["status"],
-            "tts_language": model_cache["tts"]["language"]
-        }
     }
 @app.post("/update-languages")
 async def update_languages(source_lang: str = Form(...), target_lang: str = Form(...)):
-    """
-    Update the language settings for translation services
-    Will trigger loading of necessary models if not already loaded
-    """
     if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
         raise HTTPException(status_code=400, detail="Invalid language selected")
     source_code = LANGUAGE_MAPPING[source_lang]
     target_code = LANGUAGE_MAPPING[target_lang]
-    # Determine which STT model to use based on the source language
-    if source_code in ["eng", "tgl"]:
-        # Load Whisper for English or Tagalog
-        if not load_whisper_model():
-            return {"status": "pending", "message": "Whisper model loading in progress"}
-    else:
-        # Load MMS for other Philippine languages
-        if not load_mms_stt_model():
-            return {"status": "pending", "message": "MMS STT model loading in progress"}
-    # Load the MT model if not already loaded
-    if not load_mt_model():
-        return {"status": "pending", "message": "MT model loading in progress"}
-    # Load the appropriate TTS model for the target language
-    if not load_tts_model(target_code):
-        return {"status": "pending", "message": "TTS model loading in progress"}
-    logger.info(f"Languages updated to {source_lang} → {target_lang}")
-    return {"status": "success", "message": f"Languages updated to {source_lang} → {target_lang}"}
-@app.post("/synthesize-speech")
-async def synthesize_speech(text: str = Form(...), language: str = Form(...)):
-    """Endpoint to synthesize speech from text without translation"""
-    if language not in LANGUAGE_MAPPING:
-        raise HTTPException(status_code=400, detail="Invalid language selected")
-    language_code = LANGUAGE_MAPPING[language]
-    request_id = str(uuid.uuid4())
-    # Load the TTS model for the requested language
-    if not load_tts_model(language_code):
-        return {
-            "request_id": request_id,
-            "status": "pending",
-            "message": "TTS model loading in progress. Please try again in a moment."
-        }
     try:
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        inputs = model_cache["tts"]["tokenizer"](text, return_tensors="pt").to(device)
-        with torch.no_grad():
-            output = model_cache["tts"]["model"](**inputs)
-        speech = output.waveform.cpu().numpy().squeeze()
-        speech = (speech * 32767).astype(np.int16)
-        sample_rate = model_cache["tts"]["model"].config.sampling_rate
-        # Save the audio as a WAV file
-        output_filename = f"{request_id}.wav"
-        output_path = os.path.join(AUDIO_DIR, output_filename)
-        save_pcm_to_wav(speech.tolist(), sample_rate, output_path)
-        logger.info(f"Saved synthesized audio to {output_path}")
-        # Generate a URL to the WAV file
-        output_audio_url = f"https://jerich-talklasapp.hf.space/audio_output/{output_filename}"
-        return {
-            "request_id": request_id,
-            "status": "completed",
-            "message": "Speech synthesis completed successfully",
-            "text": text,
-            "output_audio": output_audio_url
-        }
     except Exception as e:
-        logger.error(f"Error during speech synthesis: {str(e)}")
-        return {
-            "request_id": request_id,
-            "status": "failed",
-            "message": f"Speech synthesis failed: {str(e)}",
-            "text": text,
-            "output_audio": None
-        }
 @app.post("/translate-text")
 async def translate_text(text: str = Form(...), source_lang: str = Form(...), target_lang: str = Form(...)):
     """Endpoint to translate text and convert to speech"""
     if not text:
         raise HTTPException(status_code=400, detail="No text provided")
     if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
@@ -429,107 +374,100 @@ async def translate_text(text: str = Form(...), source_lang: str = Form(...), ta
     logger.info(f"Translate-text requested: {text} from {source_lang} to {target_lang}")
     request_id = str(uuid.uuid4())
-    # Load the MT model if not already loaded
-    if not load_mt_model():
-        return {
-            "request_id": request_id,
-            "status": "pending",
-            "message": "MT model loading in progress. Please try again in a moment.",
-            "source_text": text,
-            "translated_text": "Translation not available yet",
-            "output_audio": None,
-            "contains_inappropriate_content": False
-        }
     # Translate the text
     source_code = LANGUAGE_MAPPING[source_lang]
     target_code = LANGUAGE_MAPPING[target_lang]
     translated_text = "Translation not available"
-    contains_inappropriate = False
-    try:
-        source_nllb_code = NLLB_LANGUAGE_CODES[source_code]
-        target_nllb_code = NLLB_LANGUAGE_CODES[target_code]
-        model_cache["mt"]["tokenizer"].src_lang = source_nllb_code
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        inputs = model_cache["mt"]["tokenizer"](text, return_tensors="pt").to(device)
-        with torch.no_grad():
-            generated_tokens = model_cache["mt"]["model"].generate(
-                **inputs,
-                forced_bos_token_id=model_cache["mt"]["tokenizer"].convert_tokens_to_ids(target_nllb_code),
-                max_length=448
-            )
-        translated_text = model_cache["mt"]["tokenizer"].batch_decode(generated_tokens, skip_special_tokens=True)[0]
-        logger.info(f"Translation completed: {translated_text}")
-        # Check for inappropriate content
-        contains_inappropriate = detect_inappropriate_content(translated_text)
-        if contains_inappropriate:
-            logger.warning(f"Inappropriate content detected in translation")
-    except Exception as e:
-        logger.error(f"Error during translation: {str(e)}")
-        translated_text = f"Translation failed: {str(e)}"
-        return {
-            "request_id": request_id,
-            "status": "failed",
-            "message": f"Translation failed: {str(e)}",
-            "source_text": text,
-            "translated_text": translated_text,
-            "output_audio": None,
-            "contains_inappropriate_content": contains_inappropriate
-        }
-    # Load the TTS model for the target language
-    if not load_tts_model(target_code):
-        return {
-            "request_id": request_id,
-            "status": "partial",
-            "message": "Translation completed, but TTS model is loading. Please try again for audio.",
-            "source_text": text,
-            "translated_text": translated_text,
-            "output_audio": None,
-            "contains_inappropriate_content": contains_inappropriate
-        }
     # Convert translated text to speech
     output_audio_url = None
-    try:
-        inputs = model_cache["tts"]["tokenizer"](translated_text, return_tensors="pt").to(device)
-        with torch.no_grad():
-            output = model_cache["tts"]["model"](**inputs)
-        speech = output.waveform.cpu().numpy().squeeze()
-        speech = (speech * 32767).astype(np.int16)
-        sample_rate = model_cache["tts"]["model"].config.sampling_rate
-        # Save the audio as a WAV file
-        output_filename = f"{request_id}.wav"
-        output_path = os.path.join(AUDIO_DIR, output_filename)
-        save_pcm_to_wav(speech.tolist(), sample_rate, output_path)
-        logger.info(f"Saved synthesized audio to {output_path}")
-        # Generate a URL to the WAV file
-        output_audio_url = f"https://jerich-talklasapp.hf.space/audio_output/{output_filename}"
-        logger.info("TTS conversion completed")
-    except Exception as e:
-        logger.error(f"Error during TTS conversion: {str(e)}")
-        output_audio_url = None
     return {
         "request_id": request_id,
-        "status": "completed" if output_audio_url else "partial",
-        "message": "Translation and TTS completed" if output_audio_url else
-                  "Translation completed but TTS failed",
         "source_text": text,
         "translated_text": translated_text,
-        "output_audio": output_audio_url,
-        "contains_inappropriate_content": contains_inappropriate
     }
 @app.post("/translate-audio")
 async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form(...), target_lang: str = Form(...)):
     """Endpoint to transcribe, translate, and convert audio to speech"""
     if not audio:
         raise HTTPException(status_code=400, detail="No audio file provided")
     if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
@@ -538,35 +476,18 @@ async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form
     logger.info(f"Translate-audio requested: {audio.filename} from {source_lang} to {target_lang}")
     request_id = str(uuid.uuid4())
-    source_code = LANGUAGE_MAPPING[source_lang]
-    target_code = LANGUAGE_MAPPING[target_lang]
-    # Determine which STT model to use based on source language
-    use_whisper = source_code in ["eng", "tgl"]
-    # Ensure the appropriate STT model is loaded
-    if use_whisper:
-        if not load_whisper_model():
-            return {
-                "request_id": request_id,
-                "status": "pending",
-                "message": "Whisper STT model loading in progress. Please try again in a moment.",
-                "source_text": "Transcription not available yet",
-                "translated_text": "Translation not available yet",
-                "output_audio": None,
-                "contains_inappropriate_content": False
-            }
-    else:
-        if not load_mms_stt_model():
-            return {
-                "request_id": request_id,
-                "status": "pending",
-                "message": "MMS STT model loading in progress. Please try again in a moment.",
-                "source_text": "Transcription not available yet",
-                "translated_text": "Translation not available yet",
-                "output_audio": None,
-                "contains_inappropriate_content": False
-            }
     # Save the uploaded audio to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
@@ -576,7 +497,7 @@ async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form
     transcription = "Transcription not available"
     translated_text = "Translation not available"
     output_audio_url = None
-    contains_inappropriate = False
     try:
         # Step 1: Load and resample the audio using torchaudio
@@ -599,133 +520,112 @@ async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form
                 "message": "No speech detected in the audio.",
                 "source_text": "No speech detected",
                 "translated_text": "No translation available",
-                "output_audio": None,
-                "contains_inappropriate_content": False
             }
         # Step 3: Transcribe the audio (STT)
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device: {device}")
-        if use_whisper:
-            # Use Whisper for English/Tagalog
-            stt_processor = model_cache["stt_whisper"]["processor"]
-            stt_model = model_cache["stt_whisper"]["model"]
-            inputs = stt_processor(waveform.numpy(), sampling_rate=16000, return_tensors="pt").to(device)
-            logger.info("Audio processed with Whisper, generating transcription...")
-            with torch.no_grad():
-                generated_ids = stt_model.generate(**inputs, language="en" if source_code == "eng" else "tl")
                 transcription = stt_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        else:
-            # Use MMS for other Philippine languages
-            stt_processor = model_cache["stt_mms"]["processor"]
-            stt_model = model_cache["stt_mms"]["model"]
-            # Set the target language for MMS if supported
-            if source_code in stt_processor.tokenizer.vocab.keys():
-                stt_processor.tokenizer.set_target_lang(source_code)
-                stt_model.load_adapter(source_code)
-            inputs = stt_processor(waveform.numpy(), sampling_rate=16000, return_tensors="pt").to(device)
-            logger.info("Audio processed with MMS, generating transcription...")
-            with torch.no_grad():
                 logits = stt_model(**inputs).logits
                 predicted_ids = torch.argmax(logits, dim=-1)
                 transcription = stt_processor.batch_decode(predicted_ids)[0]
         logger.info(f"Transcription completed: {transcription}")
-        # Step 4: Load the MT model if not already loaded
-        if not load_mt_model():
-            return {
-                "request_id": request_id,
-                "status": "partial",
-                "message": "Transcription completed, but MT model is loading. Please try again for translation.",
-                "source_text": transcription,
-                "translated_text": "Translation not available yet",
-                "output_audio": None,
-                "contains_inappropriate_content": False
-            }
-        # Step 5: Translate the transcribed text (MT)
-        try:
-            source_nllb_code = NLLB_LANGUAGE_CODES[source_code]
-            target_nllb_code = NLLB_LANGUAGE_CODES[target_code]
-            model_cache["mt"]["tokenizer"].src_lang = source_nllb_code
-            inputs = model_cache["mt"]["tokenizer"](transcription, return_tensors="pt").to(device)
-            with torch.no_grad():
-                generated_tokens = model_cache["mt"]["model"].generate(
-                    **inputs,
-                    forced_bos_token_id=model_cache["mt"]["tokenizer"].convert_tokens_to_ids(target_nllb_code),
-                    max_length=448
-                )
-            translated_text = model_cache["mt"]["tokenizer"].batch_decode(generated_tokens, skip_special_tokens=True)[0]
-            logger.info(f"Translation completed: {translated_text}")
-            # Check for inappropriate content
-            contains_inappropriate = detect_inappropriate_content(translated_text)
-            if contains_inappropriate:
-                logger.warning(f"Inappropriate content detected in translation")
-        except Exception as e:
-            logger.error(f"Error during translation: {str(e)}")
-            translated_text = f"Translation failed: {str(e)}"
-            return {
-                "request_id": request_id,
-                "status": "partial",
-                "message": f"Transcription completed but translation failed: {str(e)}",
-                "source_text": transcription,
-                "translated_text": translated_text,
-                "output_audio": None,
-                "contains_inappropriate_content": False
-            }
-        # Step 6: Load the TTS model for the target language
-        if not load_tts_model(target_code):
-            return {
-                "request_id": request_id,
-                "status": "partial",
-                "message": "Transcription and translation completed, but TTS model is loading.",
-                "source_text": transcription,
-                "translated_text": translated_text,
-                "output_audio": None,
-                "contains_inappropriate_content": contains_inappropriate
-            }
-        # Step 7: Convert translated text to speech (TTS)
-        try:
-            inputs = model_cache["tts"]["tokenizer"](translated_text, return_tensors="pt").to(device)
-            with torch.no_grad():
-                output = model_cache["tts"]["model"](**inputs)
-            speech = output.waveform.cpu().numpy().squeeze()
-            speech = (speech * 32767).astype(np.int16)
-            sample_rate = model_cache["tts"]["model"].config.sampling_rate
-            # Save the audio as a WAV file
-            output_filename = f"{request_id}.wav"
-            output_path = os.path.join(AUDIO_DIR, output_filename)
-            save_pcm_to_wav(speech.tolist(), sample_rate, output_path)
-            logger.info(f"Saved synthesized audio to {output_path}")
-            # Generate a URL to the WAV file
-            output_audio_url = f"https://jerich-talklasapp.hf.space/audio_output/{output_filename}"
-            logger.info("TTS conversion completed")
-        except Exception as e:
-            logger.error(f"Error during TTS conversion: {str(e)}")
-            output_audio_url = None
-        return {
             "request_id": request_id,
-            "status": "completed" if output_audio_url else "partial",
-            "message": "Transcription, translation, and TTS completed" if output_audio_url else
-                      "Transcription and translation completed but TTS failed",
             "source_text": transcription,
             "translated_text": translated_text,
-            "output_audio": output_audio_url,
-            "contains_inappropriate_content": contains_inappropriate
         }
     except Exception as e:
         logger.error(f"Error during processing: {str(e)}")
@@ -735,29 +635,113 @@ async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form
             "message": f"Processing failed: {str(e)}",
             "source_text": transcription,
             "translated_text": translated_text,
-            "output_audio": output_audio_url,
-            "contains_inappropriate_content": contains_inappropriate
         }
     finally:
         logger.info(f"Cleaning up temporary file: {temp_path}")
-        try:
-            os.unlink(temp_path)
-        except Exception as e:
-            logger.error(f"Error deleting temporary file: {str(e)}")
-# Add a method to check if text contains inappropriate content
-@app.post("/check-content")
-async def check_content(text: str = Form(...)):
-    """
-    Check if the provided text contains inappropriate content
-    """
-    contains_inappropriate = detect_inappropriate_content(text)
-    return {
-        "text": text,
-        "contains_inappropriate_content": contains_inappropriate
-    }
 if __name__ == "__main__":
     import uvicorn

 app.mount("/audio_output", StaticFiles(directory=AUDIO_DIR), name="audio_output")
 # Global variables to track application state
+models_loaded = False
+loading_in_progress = False
+loading_thread = None
+model_status = {
+    "stt": "not_loaded",
+    "mt": "not_loaded",
+    "tts": "not_loaded"
 }
+error_message = None
+current_tts_language = "tgl"  # Track the current TTS language
+# Model instances
+stt_processor = None
+stt_model = None
+mt_model = None
+mt_tokenizer = None
+tts_model = None
+tts_tokenizer = None
 # Define the valid languages and mappings
 LANGUAGE_MAPPING = {
     "pag": "pag_Latn"
 }
+# Define a list of inappropriate words for content filtering
 INAPPROPRIATE_WORDS = [
+    "profanity", "obscenity", "obscene", "offensive", "vulgar", "explicit",
+    # Add more words as needed or load from a separate file
 ]
+# Function to check if text contains inappropriate content
+def check_inappropriate_content(text: str) -> bool:
     """
+    Check if the given text contains inappropriate words.
+    Returns True if inappropriate content is detected, False otherwise.
     """
+    if not text:
+        return False
     text_lower = text.lower()
     for word in INAPPROPRIATE_WORDS:
+        if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
+            logger.warning(f"Inappropriate content detected: '{word}' in text")
             return True
     return False
 # Function to save PCM data as a WAV file
 def save_pcm_to_wav(pcm_data: list, sample_rate: int, output_path: str):
     # Convert pcm_data to a NumPy array of 16-bit integers
         # Write the 16-bit PCM data as bytes (little-endian)
         wav_file.writeframes(pcm_array.tobytes())
 # Function to detect speech using an energy-based approach
 def detect_speech(waveform: torch.Tensor, sample_rate: int, threshold: float = 0.01, min_speech_duration: float = 0.5) -> bool:
     """
     # For now, we assume if RMS is above threshold, there is speech
     return True
 # Function to clean up old audio files
 def cleanup_old_audio_files():
     logger.info("Starting cleanup of old audio files...")
                 except Exception as e:
                     logger.error(f"Error deleting file {file_path}: {str(e)}")
 # Background task to periodically clean up audio files
 def schedule_cleanup():
     while True:
         cleanup_old_audio_files()
         time.sleep(300)  # Run every 5 minutes (300 seconds)
+# Function to load models in background
+def load_models_task():
+    global models_loaded, loading_in_progress, model_status, error_message
+    global stt_processor, stt_model, mt_model, mt_tokenizer, tts_model, tts_tokenizer
     try:
+        loading_in_progress = True
+        # Load STT model (MMS with fallback to Whisper)
+        logger.info("Starting to load STT model...")
+        from transformers import AutoProcessor, AutoModelForCTC, WhisperProcessor, WhisperForConditionalGeneration
+        try:
+            logger.info("Loading MMS STT model...")
+            model_status["stt"] = "loading"
+            stt_processor = AutoProcessor.from_pretrained("facebook/mms-1b-all")
+            stt_model = AutoModelForCTC.from_pretrained("facebook/mms-1b-all")
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            stt_model.to(device)
+            logger.info("MMS STT model loaded successfully")
+            model_status["stt"] = "loaded_mms"
+        except Exception as mms_error:
+            logger.error(f"Failed to load MMS STT model: {str(mms_error)}")
+            logger.info("Falling back to Whisper STT model...")
+            try:
+                stt_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
+                stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
+                stt_model.to(device)
+                logger.info("Whisper STT model loaded successfully as fallback")
+                model_status["stt"] = "loaded_whisper"
+            except Exception as whisper_error:
+                logger.error(f"Failed to load Whisper STT model: {str(whisper_error)}")
+                model_status["stt"] = "failed"
+                error_message = f"STT model loading failed: MMS error: {str(mms_error)}, Whisper error: {str(whisper_error)}"
+                return
+        # Load MT model
+        logger.info("Starting to load MT model...")
         from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+        try:
+            logger.info("Loading NLLB-200-distilled-600M model...")
+            model_status["mt"] = "loading"
+            mt_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+            mt_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+            mt_model.to(device)
+            logger.info("MT model loaded successfully")
+            model_status["mt"] = "loaded"
+        except Exception as e:
+            logger.error(f"Failed to load MT model: {str(e)}")
+            model_status["mt"] = "failed"
+            error_message = f"MT model loading failed: {str(e)}"
+            return
+        # Load TTS model (default to Tagalog, will be updated dynamically)
+        logger.info("Starting to load TTS model...")
         from transformers import VitsModel, AutoTokenizer
         try:
+            logger.info("Loading MMS-TTS model for Tagalog...")
+            model_status["tts"] = "loading"
+            tts_model = VitsModel.from_pretrained("facebook/mms-tts-tgl")
+            tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-tgl")
+            tts_model.to(device)
+            logger.info("TTS model loaded successfully")
+            model_status["tts"] = "loaded"
         except Exception as e:
+            logger.error(f"Failed to load TTS model for Tagalog: {str(e)}")
             # Fallback to English TTS if the target language fails
             try:
                 logger.info("Falling back to MMS-TTS English model...")
+                tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+                tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+                tts_model.to(device)
                 logger.info("Fallback TTS model loaded successfully")
+                model_status["tts"] = "loaded (fallback)"
+                current_tts_language = "eng"
             except Exception as e2:
                 logger.error(f"Failed to load fallback TTS model: {str(e2)}")
+                model_status["tts"] = "failed"
+                error_message = f"TTS model loading failed: {str(e)} (fallback also failed: {str(e2)})"
+                return
+        models_loaded = True
+        logger.info("Model loading completed successfully")
     except Exception as e:
+        error_message = str(e)
+        logger.error(f"Error in model loading task: {str(e)}")
     finally:
+        loading_in_progress = False
+# Start loading models in background
+def start_model_loading():
+    global loading_thread, loading_in_progress
+    if not loading_in_progress and not models_loaded:
+        loading_in_progress = True
+        loading_thread = threading.Thread(target=load_models_task)
+        loading_thread.daemon = True
+        loading_thread.start()
 # Start the background cleanup task
 def start_cleanup_task():
     cleanup_thread.daemon = True
     cleanup_thread.start()
 # Start the background processes when the app starts
 @app.on_event("startup")
 async def startup_event():
     logger.info("Application starting up...")
+    start_model_loading()
     start_cleanup_task()
 @app.get("/")
 async def root():
     """Root endpoint for default health check"""
     logger.info("Root endpoint requested")
     return {"status": "healthy"}
 @app.get("/health")
 async def health_check():
     """Health check endpoint that always returns successfully"""
+    global models_loaded, loading_in_progress, model_status, error_message
     logger.info("Health check requested")
     return {
         "status": "healthy",
+        "models_loaded": models_loaded,
+        "loading_in_progress": loading_in_progress,
+        "model_status": model_status,
+        "error": error_message
     }
 @app.post("/update-languages")
 async def update_languages(source_lang: str = Form(...), target_lang: str = Form(...)):
+    global stt_processor, stt_model, tts_model, tts_tokenizer, current_tts_language
     if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
         raise HTTPException(status_code=400, detail="Invalid language selected")
     source_code = LANGUAGE_MAPPING[source_lang]
     target_code = LANGUAGE_MAPPING[target_lang]
+    # Update the STT model based on the source language (MMS or Whisper)
     try:
+        logger.info("Updating STT model for source language...")
+        from transformers import AutoProcessor, AutoModelForCTC, WhisperProcessor, WhisperForConditionalGeneration
         device = "cuda" if torch.cuda.is_available() else "cpu"
+        try:
+            logger.info(f"Loading MMS STT model for {source_code}...")
+            stt_processor = AutoProcessor.from_pretrained("facebook/mms-1b-all")
+            stt_model = AutoModelForCTC.from_pretrained("facebook/mms-1b-all")
+            stt_model.to(device)
+            # Set the target language for MMS
+            if source_code in stt_processor.tokenizer.vocab.keys():
+                stt_processor.tokenizer.set_target_lang(source_code)
+                stt_model.load_adapter(source_code)
+                logger.info(f"MMS STT model updated to {source_code}")
+                model_status["stt"] = "loaded_mms"
+            else:
+                logger.warning(f"Language {source_code} not supported by MMS, using default")
+                model_status["stt"] = "loaded_mms_default"
+        except Exception as mms_error:
+            logger.error(f"Failed to load MMS STT model for {source_code}: {str(mms_error)}")
+            logger.info("Falling back to Whisper STT model...")
+            try:
+                stt_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
+                stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
+                stt_model.to(device)
+                logger.info("Whisper STT model loaded successfully as fallback")
+                model_status["stt"] = "loaded_whisper"
+            except Exception as whisper_error:
+                logger.error(f"Failed to load Whisper STT model: {str(whisper_error)}")
+                model_status["stt"] = "failed"
+                error_message = f"STT model update failed: MMS error: {str(mms_error)}, Whisper error: {str(whisper_error)}"
+                return {"status": "failed", "error": error_message}
+    except Exception as e:
+        logger.error(f"Error updating STT model: {str(e)}")
+        model_status["stt"] = "failed"
+        error_message = f"STT model update failed: {str(e)}"
+        return {"status": "failed", "error": error_message}
+    # Update the TTS model based on the target language
+    try:
+        logger.info(f"Loading MMS-TTS model for {target_code}...")
+        from transformers import VitsModel, AutoTokenizer
+        tts_model = VitsModel.from_pretrained(f"facebook/mms-tts-{target_code}")
+        tts_tokenizer = AutoTokenizer.from_pretrained(f"facebook/mms-tts-{target_code}")
+        tts_model.to(device)
+        current_tts_language = target_code
+        logger.info(f"TTS model updated to {target_code}")
+        model_status["tts"] = "loaded"
     except Exception as e:
+        logger.error(f"Failed to load TTS model for {target_code}: {str(e)}")
+        try:
+            logger.info("Falling back to MMS-TTS English model...")
+            tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+            tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+            tts_model.to(device)
+            current_tts_language = "eng"
+            logger.info("Fallback TTS model loaded successfully")
+            model_status["tts"] = "loaded (fallback)"
+        except Exception as e2:
+            logger.error(f"Failed to load fallback TTS model: {str(e2)}")
+            model_status["tts"] = "failed"
+            error_message = f"TTS model loading failed: {str(e)} (fallback also failed: {str(e2)})"
+            return {"status": "failed", "error": error_message}
+    logger.info(f"Updating languages: {source_lang} → {target_lang}")
+    return {"status": f"Languages updated to {source_lang} → {target_lang}"}
 @app.post("/translate-text")
 async def translate_text(text: str = Form(...), source_lang: str = Form(...), target_lang: str = Form(...)):
     """Endpoint to translate text and convert to speech"""
+    global mt_model, mt_tokenizer, tts_model, tts_tokenizer, current_tts_language
     if not text:
         raise HTTPException(status_code=400, detail="No text provided")
     if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
     logger.info(f"Translate-text requested: {text} from {source_lang} to {target_lang}")
     request_id = str(uuid.uuid4())
     # Translate the text
     source_code = LANGUAGE_MAPPING[source_lang]
     target_code = LANGUAGE_MAPPING[target_lang]
     translated_text = "Translation not available"
+    if model_status["mt"] == "loaded" and mt_model is not None and mt_tokenizer is not None:
+        try:
+            source_nllb_code = NLLB_LANGUAGE_CODES[source_code]
+            target_nllb_code = NLLB_LANGUAGE_CODES[target_code]
+            mt_tokenizer.src_lang = source_nllb_code
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            inputs = mt_tokenizer(text, return_tensors="pt").to(device)
+            with torch.no_grad():
+                generated_tokens = mt_model.generate(
+                    **inputs,
+                    forced_bos_token_id=mt_tokenizer.convert_tokens_to_ids(target_nllb_code),
+                    max_length=448
+                )
+            translated_text = mt_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+            logger.info(f"Translation completed: {translated_text}")
+        except Exception as e:
+            logger.error(f"Error during translation: {str(e)}")
+            translated_text = f"Translation failed: {str(e)}"
+    else:
+        logger.warning("MT model not loaded, skipping translation")
+    # Check for inappropriate content in the translated text
+    is_inappropriate = check_inappropriate_content(translated_text)
+    logger.info(f"Inappropriate content check: {is_inappropriate}")
+    # Update TTS model if the target language doesn't match the current TTS language
+    if current_tts_language != target_code:
+        try:
+            logger.info(f"Updating TTS model for {target_code}...")
+            from transformers import VitsModel, AutoTokenizer
+            tts_model = VitsModel.from_pretrained(f"facebook/mms-tts-{target_code}")
+            tts_tokenizer = AutoTokenizer.from_pretrained(f"facebook/mms-tts-{target_code}")
+            tts_model.to(device)
+            current_tts_language = target_code
+            logger.info(f"TTS model updated to {target_code}")
+            model_status["tts"] = "loaded"
+        except Exception as e:
+            logger.error(f"Failed to load TTS model for {target_code}: {str(e)}")
+            try:
+                logger.info("Falling back to MMS-TTS English model...")
+                tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+                tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+                tts_model.to(device)
+                current_tts_language = "eng"
+                logger.info("Fallback TTS model loaded successfully")
+                model_status["tts"] = "loaded (fallback)"
+            except Exception as e2:
+                logger.error(f"Failed to load fallback TTS model: {str(e2)}")
+                model_status["tts"] = "failed"
     # Convert translated text to speech
     output_audio_url = None
+    if model_status["tts"].startswith("loaded") and tts_model is not None and tts_tokenizer is not None:
+        try:
+            inputs = tts_tokenizer(translated_text, return_tensors="pt").to(device)
+            with torch.no_grad():
+                output = tts_model(**inputs)
+            speech = output.waveform.cpu().numpy().squeeze()
+            speech = (speech * 32767).astype(np.int16)
+            sample_rate = tts_model.config.sampling_rate
+            # Save the audio as a WAV file
+            output_filename = f"{request_id}.wav"
+            output_path = os.path.join(AUDIO_DIR, output_filename)
+            save_pcm_to_wav(speech.tolist(), sample_rate, output_path)
+            logger.info(f"Saved synthesized audio to {output_path}")
+            # Generate a URL to the WAV file
+            output_audio_url = f"https://jerich-talklasapp.hf.space/audio_output/{output_filename}"
+            logger.info("TTS conversion completed")
+        except Exception as e:
+            logger.error(f"Error during TTS conversion: {str(e)}")
+            output_audio_url = None
     return {
         "request_id": request_id,
+        "status": "completed",
+        "message": "Translation and TTS completed (or partially completed).",
         "source_text": text,
         "translated_text": translated_text,
+        "is_inappropriate": is_inappropriate,
+        "output_audio": output_audio_url
     }
 @app.post("/translate-audio")
 async def translate_audio(audio: UploadFile = File(...), source_lang: str = Form(...), target_lang: str = Form(...)):
     """Endpoint to transcribe, translate, and convert audio to speech"""
+    global stt_processor, stt_model, mt_model, mt_tokenizer, tts_model, tts_tokenizer, current_tts_language
     if not audio:
         raise HTTPException(status_code=400, detail="No audio file provided")
     if source_lang not in LANGUAGE_MAPPING or target_lang not in LANGUAGE_MAPPING:
     logger.info(f"Translate-audio requested: {audio.filename} from {source_lang} to {target_lang}")
     request_id = str(uuid.uuid4())
+    # Check if STT model is loaded
+    if model_status["stt"] not in ["loaded_mms", "loaded_mms_default", "loaded_whisper"] or stt_processor is None or stt_model is None:
+        logger.warning("STT model not loaded, returning placeholder response")
+        return {
+            "request_id": request_id,
+            "status": "processing",
+            "message": "STT model not loaded yet. Please try again later.",
+            "source_text": "Transcription not available",
+            "translated_text": "Translation not available",
+            "is_inappropriate": False,
+            "output_audio": None
+        }
     # Save the uploaded audio to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
     transcription = "Transcription not available"
     translated_text = "Translation not available"
     output_audio_url = None
+    is_inappropriate = False
     try:
         # Step 1: Load and resample the audio using torchaudio
                 "message": "No speech detected in the audio.",
                 "source_text": "No speech detected",
                 "translated_text": "No translation available",
+                "is_inappropriate": False,
+                "output_audio": None
             }
         # Step 3: Transcribe the audio (STT)
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device: {device}")
+        inputs = stt_processor(waveform.numpy(), sampling_rate=16000, return_tensors="pt").to(device)
+        logger.info("Audio processed, generating transcription...")
+        with torch.no_grad():
+            if model_status["stt"] == "loaded_whisper":
+                # Whisper model
+                generated_ids = stt_model.generate(**inputs, language="en")
                 transcription = stt_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            else:
+                # MMS model
                 logits = stt_model(**inputs).logits
                 predicted_ids = torch.argmax(logits, dim=-1)
                 transcription = stt_processor.batch_decode(predicted_ids)[0]
         logger.info(f"Transcription completed: {transcription}")
+        # Step 4: Translate the transcribed text (MT)
+        source_code = LANGUAGE_MAPPING[source_lang]
+        target_code = LANGUAGE_MAPPING[target_lang]
+        if model_status["mt"] == "loaded" and mt_model is not None and mt_tokenizer is not None:
+            try:
+                source_nllb_code = NLLB_LANGUAGE_CODES[source_code]
+                target_nllb_code = NLLB_LANGUAGE_CODES[target_code]
+                mt_tokenizer.src_lang = source_nllb_code
+                inputs = mt_tokenizer(transcription, return_tensors="pt").to(device)
+                with torch.no_grad():
+                    generated_tokens = mt_model.generate(
+                        **inputs,
+                        forced_bos_token_id=mt_tokenizer.convert_tokens_to_ids(target_nllb_code),
+                        max_length=448
+                    )
+                translated_text = mt_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+                logger.info(f"Translation completed: {translated_text}")
+                # Check for inappropriate content in the translated text
+                is_inappropriate = check_inappropriate_content(translated_text)
+                logger.info(f"Inappropriate content check: {is_inappropriate}")
+            except Exception as e:
+                logger.error(f"Error during translation: {str(e)}")
+                translated_text = f"Translation failed: {str(e)}"
+        else:
+            logger.warning("MT model not loaded, skipping translation")
+        # Step 5: Update TTS model if the target language doesn't match the current TTS language
+        if current_tts_language != target_code:
+            try:
+                logger.info(f"Updating TTS model for {target_code}...")
+                from transformers import VitsModel, AutoTokenizer
+                tts_model = VitsModel.from_pretrained(f"facebook/mms-tts-{target_code}")
+                tts_tokenizer = AutoTokenizer.from_pretrained(f"facebook/mms-tts-{target_code}")
+                tts_model.to(device)
+                current_tts_language = target_code
+                logger.info(f"TTS model updated to {target_code}")
+                model_status["tts"] = "loaded"
+            except Exception as e:
+                logger.error(f"Failed to load TTS model for {target_code}: {str(e)}")
+                try:
+                    logger.info("Falling back to MMS-TTS English model...")
+                    tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+                    tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+                    tts_model.to(device)
+                    current_tts_language = "eng"
+                    logger.info("Fallback TTS model loaded successfully")
+                    model_status["tts"] = "loaded (fallback)"
+                except Exception as e2:
+                    logger.error(f"Failed to load fallback TTS model: {str(e2)}")
+                    model_status["tts"] = "failed"
+        # Step 6: Convert translated text to speech (TTS)
+        if model_status["tts"].startswith("loaded") and tts_model is not None and tts_tokenizer is not None:
+            try:
+                inputs = tts_tokenizer(translated_text, return_tensors="pt").to(device)
+                with torch.no_grad():
+                    output = tts_model(**inputs)
+                speech = output.waveform.cpu().numpy().squeeze()
+                speech = (speech * 32767).astype(np.int16)
+                sample_rate = tts_model.config.sampling_rate
+                # Save the audio as a WAV file
+                output_filename = f"{request_id}.wav"
+                output_path = os.path.join(AUDIO_DIR, output_filename)
+                save_pcm_to_wav(speech.tolist(), sample_rate, output_path)
+                logger.info(f"Saved synthesized audio to {output_path}")
+                # Generate a URL to the WAV file
+                output_audio_url = f"https://jerich-talklasapp.hf.space/audio_output/{output_filename}"
+                logger.info("TTS conversion completed")
+            except Exception as e:
+                logger.error(f"Error during TTS conversion: {str(e)}")
+                output_audio_url = None
+                return {
             "request_id": request_id,
+            "status": "completed",
+            "message": "Transcription, translation, and TTS completed (or partially completed).",
             "source_text": transcription,
             "translated_text": translated_text,
+            "is_inappropriate": is_inappropriate,
+            "output_audio": output_audio_url
         }
     except Exception as e:
         logger.error(f"Error during processing: {str(e)}")
             "message": f"Processing failed: {str(e)}",
             "source_text": transcription,
             "translated_text": translated_text,
+            "is_inappropriate": is_inappropriate,
+            "output_audio": output_audio_url
         }
     finally:
         logger.info(f"Cleaning up temporary file: {temp_path}")
+        os.unlink(temp_path)
+@app.post("/synthesize-speech")
+async def synthesize_speech(text: str = Form(...), target_lang: str = Form(...)):
+    """Endpoint to generate synthesized speech for the given text in the target language"""
+    global tts_model, tts_tokenizer, current_tts_language
+    if not text:
+        raise HTTPException(status_code=400, detail="No text provided")
+    if target_lang not in LANGUAGE_MAPPING:
+        raise HTTPException(status_code=400, detail="Invalid language selected")
+    logger.info(f"Synthesize-speech requested: '{text}' in {target_lang}")
+    request_id = str(uuid.uuid4())
+    # Check if TTS model is loaded
+    if not model_status["tts"].startswith("loaded") or tts_model is None or tts_tokenizer is None:
+        logger.warning("TTS model not loaded, returning error response")
+        return {
+            "request_id": request_id,
+            "status": "processing",
+            "message": "TTS model not loaded yet. Please try again later.",
+            "output_audio": None
+        }
+    target_code = LANGUAGE_MAPPING[target_lang]
+    output_audio_url = None
+    try:
+        # Update TTS model if the target language doesn't match the current TTS language
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        if current_tts_language != target_code:
+            try:
+                logger.info(f"Updating TTS model for {target_code}...")
+                from transformers import VitsModel, AutoTokenizer
+                tts_model = VitsModel.from_pretrained(f"facebook/mms-tts-{target_code}")
+                tts_tokenizer = AutoTokenizer.from_pretrained(f"facebook/mms-tts-{target_code}")
+                tts_model.to(device)
+                current_tts_language = target_code
+                logger.info(f"TTS model updated to {target_code}")
+                model_status["tts"] = "loaded"
+            except Exception as e:
+                logger.error(f"Failed to load TTS model for {target_code}: {str(e)}")
+                try:
+                    logger.info("Falling back to MMS-TTS English model...")
+                    tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+                    tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+                    tts_model.to(device)
+                    current_tts_language = "eng"
+                    logger.info("Fallback TTS model loaded successfully")
+                    model_status["tts"] = "loaded (fallback)"
+                except Exception as e2:
+                    logger.error(f"Failed to load fallback TTS model: {str(e2)}")
+                    model_status["tts"] = "failed"
+                    error_message = f"TTS model loading failed: {str(e)} (fallback also failed: {str(e2)})"
+                    return {
+                        "request_id": request_id,
+                        "status": "failed",
+                        "message": error_message,
+                        "output_audio": None
+                    }
+        # Check for inappropriate content in the input text
+        is_inappropriate = check_inappropriate_content(text)
+        logger.info(f"Inappropriate content check: {is_inappropriate}")
+        # Generate speech from text
+        inputs = tts_tokenizer(text, return_tensors="pt").to(device)
+        logger.info("Generating speech...")
+        with torch.no_grad():
+            output = tts_model(**inputs)
+        speech = output.waveform.cpu().numpy().squeeze()
+        speech = (speech * 32767).astype(np.int16)
+        sample_rate = tts_model.config.sampling_rate
+        # Save the audio as a WAV file
+        output_filename = f"{request_id}.wav"
+        output_path = os.path.join(AUDIO_DIR, output_filename)
+        save_pcm_to_wav(speech.tolist(), sample_rate, output_path)
+        logger.info(f"Saved synthesized audio to {output_path}")
+        # Generate a URL to the WAV file
+        output_audio_url = f"https://jerich-talklasapp.hf.space/audio_output/{output_filename}"
+        logger.info("TTS conversion completed")
+        return {
+            "request_id": request_id,
+            "status": "completed",
+            "message": "Text-to-speech conversion completed successfully.",
+            "text": text,
+            "is_inappropriate": is_inappropriate,
+            "output_audio": output_audio_url
+        }
+    except Exception as e:
+        logger.error(f"Error during speech synthesis: {str(e)}")
+        return {
+            "request_id": request_id,
+            "status": "failed",
+            "message": f"Speech synthesis failed: {str(e)}",
+            "output_audio": None
+        }
 if __name__ == "__main__":
     import uvicorn