Spaces:

techconspartners
/

ConversAI

Sleeping

File size: 5,743 Bytes

b368e21

"""
Created By: ishwor subedi
Date: 2024-07-31
"""
import os
import tempfile
from fastapi.responses import JSONResponse
from fastapi import Form
from fastapi import UploadFile, HTTPException, status
from src.models.models import TextToSpeechRequest
from fastapi.routing import APIRouter
from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
from src import logging

speech_translator_router = APIRouter(tags=["SpeechTranscription"])
pipeline = SpeechTranscriptionPipeline()


@speech_translator_router.post(
    "/text_to_speech",
    description="""
    ** For language refer below points**
    **Supported Locales:**

    - **English:**
      - **Australia:** 
        - **Language:** en
        - **TLD:** com.au
      - **United Kingdom:**
        - **Language:** en
        - **TLD:** co.uk
      - **United States:**
        - **Language:** en
        - **TLD:** us
      - **Canada:**
        - **Language:** en
        - **TLD:** ca
      - **India:**
        - **Language:** en
        - **TLD:** co.in
      - **Ireland:**
        - **Language:** en
        - **TLD:** ie
      - **South Africa:**
        - **Language:** en
        - **TLD:** co.za
      - **Nigeria:**
        - **Language:** en
        - **TLD:** com.ng

    - **French:**
      - **Canada:** 
        - **Language:** fr
        - **TLD:** ca
      - **France:**
        - **Language:** fr
        - **TLD:** fr

    - **Mandarin:**
      - **China Mainland:** 
        - **Language:** zh-CN
        - **TLD:** any
      - **Taiwan:**
        - **Language:** zh-TW
        - **TLD:** any

    - **Portuguese:**
      - **Brazil:** 
        - **Language:** pt
        - **TLD:** com.br
      - **Portugal:**
        - **Language:** pt
        - **TLD:** pt

    - **Spanish:**
      - **Mexico:** 
        - **Language:** es
        - **TLD:** com.mx
      - **Spain:**
        - **Language:** es
        - **TLD:** es
      - **United States:**
        - **Language:** es
        - **TLD:** us
    """
)
async def text_to_speech(request: TextToSpeechRequest):
    logging.info(f"Text to speech request received")
    try:
        audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
        if not audio_bytes:
            logging.error(f"Audio generation failed.")
            raise ValueError("Audio generation failed.")
        logging.info(f"Text to speech request processed successfully")
        return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
    except ValueError as ve:
        logging.error(f"Error processing text to speech request: {str(ve)}")
        raise HTTPException(status_code=400, detail=str(ve))
    except Exception as e:
        logging.error(f"Internal Server Error: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal Server Error")


@speech_translator_router.post(
    "/speech_to_text",
    description="""
    ** Specify the language used in the audio **
    **Supported Languages:**

    **Major Languages:**
    - **English:** en
    - **Mandarin Chinese:** zh
    - **Spanish:** es
    - **French:** fr
    - **German:** de
    - **Italian:** it
    - **Japanese:** ja
    - **Korean:** ko
    - **Russian:** ru
    - **Portuguese:** pt
    - **Arabic:** ar

    **Additional Languages:**

    - **Indic Languages:**
      - **Hindi:** hi
      - **Bengali:** bn
      - **Tamil:** ta
      - **Telugu:** te

    - **Southeast Asian Languages:**
      - **Vietnamese:** vi
      - **Thai:** th
      - **Indonesian:** id
      - **Malay:** ms

    - **African Languages:**
      - **Swahili:** sw
      - **Yoruba:** yo
      - **Hausa:** ha

    - **European Languages:**
      - **Polish:** pl
      - **Dutch:** nl
      - **Swedish:** sv
      - **Norwegian:** no
    """
)
async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
    logging.info(f"Speech to text request received")
    try:
        audio_bytes = await audio.read()
        if not audio_bytes:
            logging.error(f"Empty audio file")
            raise ValueError("Empty audio file")
    except Exception as e:
        logging.error(f"Invalid audio file {e}")
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Invalid audio file"
        )

    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
            temp_audio_file.write(audio_bytes)
            temp_audio_file_path = temp_audio_file.name
            logging.info(f"Temporary audio file created at {temp_audio_file_path}")
    except Exception as e:
        logging.error(f"Could not process audio file{e}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Could not process audio file"
        )

    try:
        logging.info(f"Transcribing audio to text")
        transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
    except FileNotFoundError as fnfe:
        logging.error(f"Temporary file not found{fnfel}")
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="Temporary file not found"
        )
    except Exception as e:
        logging.error(f"Error processing speech-to-text: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error processing speech-to-text"
        )
    finally:
        logging.info(f"Cleaning up temporary audio file")
        if os.path.exists(temp_audio_file_path):
            os.remove(temp_audio_file_path)

    return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)