fast_rep_voice / main.py
Krish-05's picture
Update main.py
e563ca3 verified
import uvicorn
from fastapi import FastAPI, UploadFile, File, HTTPException
import whisper
import io
import soundfile as sf
import logging
import torch
import os
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logger = logging.getLogger(__name__)
app = FastAPI()
# Global variable to store the model
whisper_model = None
@app.on_event("startup")
async def startup_event():
global whisper_model
logger.info("Starting up FastAPI application...")
device = os.environ.get("WHISPER_DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Loading Whisper model (tiny) on {device}...")
try:
whisper_model = whisper.load_model("tiny", device=device)
logger.info("Whisper model (tiny) loaded successfully.")
except Exception as e:
logger.error(f"Failed to load Whisper model: {e}", exc_info=True)
raise RuntimeError(f"Could not load Whisper model: {e}. Check torch/whisper installation or device.")
@app.post("/transcribe_audio/")
async def transcribe_audio(audio_file: UploadFile = File(...)):
if whisper_model is None:
logger.error("Whisper model is not loaded.")
raise HTTPException(status_code=503, detail="Whisper model is not loaded yet. Please try again in a moment.")
logger.info(f"Received audio file: {audio_file.filename}, type: {audio_file.content_type}")
try:
audio_bytes = await audio_file.read()
audio_io = io.BytesIO(audio_bytes)
data, samplerate = sf.read(audio_io)
if data.dtype != 'float32':
data = data.astype('float32')
if data.ndim > 1:
data = data.mean(axis=1)
result = whisper_model.transcribe(data)
transcription = result["text"]
logger.info(f"Transcription complete: '{transcription[:100]}...'")
return {"transcription": transcription}
except Exception as e:
logger.error(f"Error during transcription: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Error processing audio: {e}. Please check server logs.")
if __name__ == "__main__":
logger.info("Starting FastAPI Whisper server on port 1990...")
uvicorn.run(app, host="0.0.0.0", port=1990)