""" Created By: ishwor subedi Date: 2024-07-31 """ import os import tempfile from fastapi.responses import JSONResponse from fastapi import Form from fastapi import UploadFile, HTTPException, status from src.models.models import TextToSpeechRequest from fastapi.routing import APIRouter from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline from src import logging speech_translator_router = APIRouter(tags=["SpeechTranscription"]) pipeline = SpeechTranscriptionPipeline() @speech_translator_router.post( "/text_to_speech", description=""" ** For language refer below points** **Supported Locales:** - **English:** - **Australia:** - **Language:** en - **TLD:** com.au - **United Kingdom:** - **Language:** en - **TLD:** co.uk - **United States:** - **Language:** en - **TLD:** us - **Canada:** - **Language:** en - **TLD:** ca - **India:** - **Language:** en - **TLD:** co.in - **Ireland:** - **Language:** en - **TLD:** ie - **South Africa:** - **Language:** en - **TLD:** co.za - **Nigeria:** - **Language:** en - **TLD:** com.ng - **French:** - **Canada:** - **Language:** fr - **TLD:** ca - **France:** - **Language:** fr - **TLD:** fr - **Mandarin:** - **China Mainland:** - **Language:** zh-CN - **TLD:** any - **Taiwan:** - **Language:** zh-TW - **TLD:** any - **Portuguese:** - **Brazil:** - **Language:** pt - **TLD:** com.br - **Portugal:** - **Language:** pt - **TLD:** pt - **Spanish:** - **Mexico:** - **Language:** es - **TLD:** com.mx - **Spain:** - **Language:** es - **TLD:** es - **United States:** - **Language:** es - **TLD:** us """ ) async def text_to_speech(request: TextToSpeechRequest): logging.info(f"Text to speech request received") try: audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld) if not audio_bytes: logging.error(f"Audio generation failed.") raise ValueError("Audio generation failed.") logging.info(f"Text to speech request processed successfully") return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200) except ValueError as ve: logging.error(f"Error processing text to speech request: {str(ve)}") raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logging.error(f"Internal Server Error: {str(e)}") raise HTTPException(status_code=500, detail="Internal Server Error") @speech_translator_router.post( "/speech_to_text", description=""" ** Specify the language used in the audio ** **Supported Languages:** **Major Languages:** - **English:** en - **Mandarin Chinese:** zh - **Spanish:** es - **French:** fr - **German:** de - **Italian:** it - **Japanese:** ja - **Korean:** ko - **Russian:** ru - **Portuguese:** pt - **Arabic:** ar **Additional Languages:** - **Indic Languages:** - **Hindi:** hi - **Bengali:** bn - **Tamil:** ta - **Telugu:** te - **Southeast Asian Languages:** - **Vietnamese:** vi - **Thai:** th - **Indonesian:** id - **Malay:** ms - **African Languages:** - **Swahili:** sw - **Yoruba:** yo - **Hausa:** ha - **European Languages:** - **Polish:** pl - **Dutch:** nl - **Swedish:** sv - **Norwegian:** no """ ) async def speech_to_text(audio: UploadFile, lang: str = Form(...)): logging.info(f"Speech to text request received") try: audio_bytes = await audio.read() if not audio_bytes: logging.error(f"Empty audio file") raise ValueError("Empty audio file") except Exception as e: logging.error(f"Invalid audio file {e}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file" ) try: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: temp_audio_file.write(audio_bytes) temp_audio_file_path = temp_audio_file.name logging.info(f"Temporary audio file created at {temp_audio_file_path}") except Exception as e: logging.error(f"Could not process audio file{e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Could not process audio file" ) try: logging.info(f"Transcribing audio to text") transcript = pipeline.speech_to_text(temp_audio_file_path, lang) except FileNotFoundError as fnfe: logging.error(f"Temporary file not found{fnfel}") raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Temporary file not found" ) except Exception as e: logging.error(f"Error processing speech-to-text: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error processing speech-to-text" ) finally: logging.info(f"Cleaning up temporary audio file") if os.path.exists(temp_audio_file_path): os.remove(temp_audio_file_path) return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)