Spaces:

Krish-05
/

fast_rep_voice

Paused

File size: 2,289 Bytes

e563ca3
 
 
 
 
2f5d8bd
e563ca3
 
2f5d8bd
e563ca3
 
2f5d8bd
 
e1b30b5
 
e563ca3
 
2f5d8bd
e563ca3
 
 
 
2f5d8bd
e563ca3
2f5d8bd
e563ca3
2f5d8bd
e563ca3
 
2f5d8bd
e563ca3
 
c278e47
e563ca3
c278e47
e563ca3
 
 
c278e47
e563ca3
c278e47
e1b30b5
e563ca3
 
e1b30b5
e563ca3
e1b30b5
e563ca3
 
c278e47
e563ca3
 
e1b30b5
e563ca3
 
 
 
 
 
 
 
e1b30b5
e563ca3

import uvicorn
from fastapi import FastAPI, UploadFile, File, HTTPException
import whisper
import io
import soundfile as sf
import logging
import torch
import os

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logger = logging.getLogger(__name__)

app = FastAPI()

# Global variable to store the model
whisper_model = None

@app.on_event("startup")
async def startup_event():
    global whisper_model
    logger.info("Starting up FastAPI application...")

    device = os.environ.get("WHISPER_DEVICE", "cuda" if torch.cuda.is_available() else "cpu")

    logger.info(f"Loading Whisper model (tiny) on {device}...")
    try:
        whisper_model = whisper.load_model("tiny", device=device)
        logger.info("Whisper model (tiny) loaded successfully.")
    except Exception as e:
        logger.error(f"Failed to load Whisper model: {e}", exc_info=True)
        raise RuntimeError(f"Could not load Whisper model: {e}. Check torch/whisper installation or device.")

@app.post("/transcribe_audio/")
async def transcribe_audio(audio_file: UploadFile = File(...)):
    if whisper_model is None:
        logger.error("Whisper model is not loaded.")
        raise HTTPException(status_code=503, detail="Whisper model is not loaded yet. Please try again in a moment.")

    logger.info(f"Received audio file: {audio_file.filename}, type: {audio_file.content_type}")

    try:
        audio_bytes = await audio_file.read()
        audio_io = io.BytesIO(audio_bytes)

        data, samplerate = sf.read(audio_io)

        if data.dtype != 'float32':
            data = data.astype('float32')

        if data.ndim > 1:
            data = data.mean(axis=1)

        result = whisper_model.transcribe(data)
        transcription = result["text"]
        logger.info(f"Transcription complete: '{transcription[:100]}...'")

        return {"transcription": transcription}
    except Exception as e:
        logger.error(f"Error during transcription: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Error processing audio: {e}. Please check server logs.")

if __name__ == "__main__":
    logger.info("Starting FastAPI Whisper server on port 1990...")
    uvicorn.run(app, host="0.0.0.0", port=1990)