import uvicorn from fastapi import FastAPI, UploadFile, File, HTTPException import whisper import io import soundfile as sf import logging import torch import os # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s') logger = logging.getLogger(__name__) app = FastAPI() # Global variable to store the model whisper_model = None @app.on_event("startup") async def startup_event(): global whisper_model logger.info("Starting up FastAPI application...") device = os.environ.get("WHISPER_DEVICE", "cuda" if torch.cuda.is_available() else "cpu") logger.info(f"Loading Whisper model (tiny) on {device}...") try: whisper_model = whisper.load_model("tiny", device=device) logger.info("Whisper model (tiny) loaded successfully.") except Exception as e: logger.error(f"Failed to load Whisper model: {e}", exc_info=True) raise RuntimeError(f"Could not load Whisper model: {e}. Check torch/whisper installation or device.") @app.post("/transcribe_audio/") async def transcribe_audio(audio_file: UploadFile = File(...)): if whisper_model is None: logger.error("Whisper model is not loaded.") raise HTTPException(status_code=503, detail="Whisper model is not loaded yet. Please try again in a moment.") logger.info(f"Received audio file: {audio_file.filename}, type: {audio_file.content_type}") try: audio_bytes = await audio_file.read() audio_io = io.BytesIO(audio_bytes) data, samplerate = sf.read(audio_io) if data.dtype != 'float32': data = data.astype('float32') if data.ndim > 1: data = data.mean(axis=1) result = whisper_model.transcribe(data) transcription = result["text"] logger.info(f"Transcription complete: '{transcription[:100]}...'") return {"transcription": transcription} except Exception as e: logger.error(f"Error during transcription: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Error processing audio: {e}. Please check server logs.") if __name__ == "__main__": logger.info("Starting FastAPI Whisper server on port 1990...") uvicorn.run(app, host="0.0.0.0", port=1990)