Spaces:

Krish-05
/

fast_rep_voice

Paused

App Files Files Community

fast_rep_voice / main.py

Krish-05

Update main.py

e563ca3 verified 6 months ago

raw

history blame contribute delete

2.29 kB

	import uvicorn
	from fastapi import FastAPI, UploadFile, File, HTTPException
	import whisper
	import io
	import soundfile as sf
	import logging
	import torch
	import os

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
	logger = logging.getLogger(__name__)

	app = FastAPI()

	# Global variable to store the model
	whisper_model = None

	@app.on_event("startup")
	async def startup_event():
	global whisper_model
	logger.info("Starting up FastAPI application...")

	device = os.environ.get("WHISPER_DEVICE", "cuda" if torch.cuda.is_available() else "cpu")

	logger.info(f"Loading Whisper model (tiny) on {device}...")
	try:
	whisper_model = whisper.load_model("tiny", device=device)
	logger.info("Whisper model (tiny) loaded successfully.")
	except Exception as e:
	logger.error(f"Failed to load Whisper model: {e}", exc_info=True)
	raise RuntimeError(f"Could not load Whisper model: {e}. Check torch/whisper installation or device.")

	@app.post("/transcribe_audio/")
	async def transcribe_audio(audio_file: UploadFile = File(...)):
	if whisper_model is None:
	logger.error("Whisper model is not loaded.")
	raise HTTPException(status_code=503, detail="Whisper model is not loaded yet. Please try again in a moment.")

	logger.info(f"Received audio file: {audio_file.filename}, type: {audio_file.content_type}")

	try:
	audio_bytes = await audio_file.read()
	audio_io = io.BytesIO(audio_bytes)

	data, samplerate = sf.read(audio_io)

	if data.dtype != 'float32':
	data = data.astype('float32')

	if data.ndim > 1:
	data = data.mean(axis=1)

	result = whisper_model.transcribe(data)
	transcription = result["text"]
	logger.info(f"Transcription complete: '{transcription[:100]}...'")

	return {"transcription": transcription}
	except Exception as e:
	logger.error(f"Error during transcription: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Error processing audio: {e}. Please check server logs.")

	if __name__ == "__main__":
	logger.info("Starting FastAPI Whisper server on port 1990...")
	uvicorn.run(app, host="0.0.0.0", port=1990)