IshraqCodersarts's picture
Model deployment
41ec97d verified
"""
Voice Emotion Recognition API
FastAPI application for analyzing voice emotions using Hugging Face transformers
"""
import logging
import tempfile
import os
from typing import Dict, Optional
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import uvicorn
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize FastAPI app
app = FastAPI(
title="Voice Emotion Recognition API",
description="API for analyzing voice emotions using Hugging Face transformers",
version="1.0.0"
)
# Add CORS middleware for Django app integration
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In production, specify your Django app URL
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Global pipeline instance (loaded once for performance)
_voice_emotion_pipeline = None
def get_voice_emotion_pipeline():
"""
Get or initialize the voice emotion recognition pipeline.
Returns:
transformers.pipeline: Voice emotion recognition pipeline
"""
global _voice_emotion_pipeline
if _voice_emotion_pipeline is None:
try:
from transformers import pipeline
logger.info("Loading voice emotion recognition model...")
_voice_emotion_pipeline = pipeline(
"audio-classification",
model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3"
)
logger.info("Voice emotion recognition model loaded successfully")
except Exception as e:
logger.error(f"Failed to load voice emotion model: {e}")
raise
return _voice_emotion_pipeline
def analyze_voice_emotion_from_file(audio_file: UploadFile) -> Dict[str, any]:
"""
Analyze voice emotion from an uploaded audio file.
Args:
audio_file: FastAPI UploadFile containing audio data
Returns:
dict: Analysis results with emotion, confidence, and all results
"""
temp_file_path = None
try:
# Get the pipeline
pipe = get_voice_emotion_pipeline()
# Determine file extension from uploaded file
file_extension = os.path.splitext(audio_file.filename)[1] if audio_file.filename else '.webm'
# Save uploaded file to temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension, mode='wb') as temp_file:
# Read and write the uploaded file content
content = audio_file.file.read()
temp_file.write(content)
temp_file_path = temp_file.name
logger.info(f"Wrote {len(content)} bytes to temp file: {temp_file_path}")
try:
# Analyze the audio file
logger.info(f"Analyzing voice emotion from file: {audio_file.filename}")
results = pipe(temp_file_path)
# Get the top result (highest confidence)
if not results:
raise ValueError("No emotion analysis results returned")
top_result = max(results, key=lambda x: x['score'])
emotion_detected = top_result['label']
confidence = top_result['score']
logger.info(f"Voice emotion detected: {emotion_detected} (confidence: {confidence:.3f})")
return {
'emotion': emotion_detected,
'confidence': confidence,
'all_results': results
}
finally:
# Clean up temporary file
if temp_file_path and os.path.exists(temp_file_path):
try:
os.unlink(temp_file_path)
except Exception as e:
logger.warning(f"Failed to delete temporary file {temp_file_path}: {e}")
except Exception as e:
logger.error(f"Voice emotion analysis failed: {e}")
# Clean up temp file on error
if temp_file_path and os.path.exists(temp_file_path):
try:
os.unlink(temp_file_path)
except:
pass
raise
@app.get("/")
async def greet_json():
"""Health check / greeting endpoint"""
return {
"message": "Voice Emotion Recognition API",
"status": "running",
"version": "1.0.0",
"endpoints": {
"/analyze": "POST - Analyze voice emotion from audio file",
"/health": "GET - Health check",
"/model-info": "GET - Model information",
"/docs": "GET - API documentation"
}
}
@app.get("/health")
async def health_check():
"""Health check endpoint"""
try:
# Check if model is loaded
pipeline = get_voice_emotion_pipeline()
model_loaded = pipeline is not None
return {
"status": "healthy",
"model_loaded": model_loaded,
"service": "voice-emotion-recognition"
}
except Exception as e:
logger.error(f"Health check failed: {e}")
return JSONResponse(
status_code=503,
content={
"status": "unhealthy",
"error": str(e),
"service": "voice-emotion-recognition"
}
)
@app.get("/model-info")
async def model_info():
"""Get model information endpoint"""
try:
pipeline = get_voice_emotion_pipeline()
model_loaded = pipeline is not None
return {
"model_name": "firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3",
"model_loaded": model_loaded,
"supported_formats": ["wav", "mp3", "flac", "m4a", "webm", "ogg", "opus"],
"max_duration_seconds": 30,
"sample_rate": 16000,
"channels": 1,
"max_file_size_mb": 15
}
except Exception as e:
logger.error(f"Failed to get model info: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get model info: {str(e)}")
@app.post("/analyze")
async def analyze_audio(audio: UploadFile = File(...)):
"""
Analyze voice emotion from uploaded audio file.
Args:
audio: Audio file (wav, mp3, flac, m4a, webm, ogg, opus)
Returns:
JSON response with emotion, confidence, and all results
"""
try:
# Validate file
if not audio.filename:
raise HTTPException(status_code=400, detail="No filename provided")
# Check file size (15MB limit)
audio.file.seek(0, os.SEEK_END)
file_size = audio.file.tell()
audio.file.seek(0)
if file_size == 0:
raise HTTPException(status_code=400, detail="Audio file is empty")
if file_size > 15 * 1024 * 1024: # 15MB
raise HTTPException(status_code=400, detail="Audio file too large (max 15MB)")
# Analyze the audio
result = analyze_voice_emotion_from_file(audio)
return {
"ok": True,
"emotion": result["emotion"],
"confidence": result["confidence"],
"all_results": result["all_results"]
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error analyzing audio: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to analyze audio: {str(e)}"
)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)