Spaces:
Sleeping
Sleeping
| """ | |
| Voice Emotion Recognition API | |
| FastAPI application for analyzing voice emotions using Hugging Face transformers | |
| """ | |
| import logging | |
| import tempfile | |
| import os | |
| from typing import Dict, Optional | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse | |
| import uvicorn | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Initialize FastAPI app | |
| app = FastAPI( | |
| title="Voice Emotion Recognition API", | |
| description="API for analyzing voice emotions using Hugging Face transformers", | |
| version="1.0.0" | |
| ) | |
| # Add CORS middleware for Django app integration | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # In production, specify your Django app URL | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Global pipeline instance (loaded once for performance) | |
| _voice_emotion_pipeline = None | |
| def get_voice_emotion_pipeline(): | |
| """ | |
| Get or initialize the voice emotion recognition pipeline. | |
| Returns: | |
| transformers.pipeline: Voice emotion recognition pipeline | |
| """ | |
| global _voice_emotion_pipeline | |
| if _voice_emotion_pipeline is None: | |
| try: | |
| from transformers import pipeline | |
| logger.info("Loading voice emotion recognition model...") | |
| _voice_emotion_pipeline = pipeline( | |
| "audio-classification", | |
| model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3" | |
| ) | |
| logger.info("Voice emotion recognition model loaded successfully") | |
| except Exception as e: | |
| logger.error(f"Failed to load voice emotion model: {e}") | |
| raise | |
| return _voice_emotion_pipeline | |
| def analyze_voice_emotion_from_file(audio_file: UploadFile) -> Dict[str, any]: | |
| """ | |
| Analyze voice emotion from an uploaded audio file. | |
| Args: | |
| audio_file: FastAPI UploadFile containing audio data | |
| Returns: | |
| dict: Analysis results with emotion, confidence, and all results | |
| """ | |
| temp_file_path = None | |
| try: | |
| # Get the pipeline | |
| pipe = get_voice_emotion_pipeline() | |
| # Determine file extension from uploaded file | |
| file_extension = os.path.splitext(audio_file.filename)[1] if audio_file.filename else '.webm' | |
| # Save uploaded file to temporary location | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension, mode='wb') as temp_file: | |
| # Read and write the uploaded file content | |
| content = audio_file.file.read() | |
| temp_file.write(content) | |
| temp_file_path = temp_file.name | |
| logger.info(f"Wrote {len(content)} bytes to temp file: {temp_file_path}") | |
| try: | |
| # Analyze the audio file | |
| logger.info(f"Analyzing voice emotion from file: {audio_file.filename}") | |
| results = pipe(temp_file_path) | |
| # Get the top result (highest confidence) | |
| if not results: | |
| raise ValueError("No emotion analysis results returned") | |
| top_result = max(results, key=lambda x: x['score']) | |
| emotion_detected = top_result['label'] | |
| confidence = top_result['score'] | |
| logger.info(f"Voice emotion detected: {emotion_detected} (confidence: {confidence:.3f})") | |
| return { | |
| 'emotion': emotion_detected, | |
| 'confidence': confidence, | |
| 'all_results': results | |
| } | |
| finally: | |
| # Clean up temporary file | |
| if temp_file_path and os.path.exists(temp_file_path): | |
| try: | |
| os.unlink(temp_file_path) | |
| except Exception as e: | |
| logger.warning(f"Failed to delete temporary file {temp_file_path}: {e}") | |
| except Exception as e: | |
| logger.error(f"Voice emotion analysis failed: {e}") | |
| # Clean up temp file on error | |
| if temp_file_path and os.path.exists(temp_file_path): | |
| try: | |
| os.unlink(temp_file_path) | |
| except: | |
| pass | |
| raise | |
| async def greet_json(): | |
| """Health check / greeting endpoint""" | |
| return { | |
| "message": "Voice Emotion Recognition API", | |
| "status": "running", | |
| "version": "1.0.0", | |
| "endpoints": { | |
| "/analyze": "POST - Analyze voice emotion from audio file", | |
| "/health": "GET - Health check", | |
| "/model-info": "GET - Model information", | |
| "/docs": "GET - API documentation" | |
| } | |
| } | |
| async def health_check(): | |
| """Health check endpoint""" | |
| try: | |
| # Check if model is loaded | |
| pipeline = get_voice_emotion_pipeline() | |
| model_loaded = pipeline is not None | |
| return { | |
| "status": "healthy", | |
| "model_loaded": model_loaded, | |
| "service": "voice-emotion-recognition" | |
| } | |
| except Exception as e: | |
| logger.error(f"Health check failed: {e}") | |
| return JSONResponse( | |
| status_code=503, | |
| content={ | |
| "status": "unhealthy", | |
| "error": str(e), | |
| "service": "voice-emotion-recognition" | |
| } | |
| ) | |
| async def model_info(): | |
| """Get model information endpoint""" | |
| try: | |
| pipeline = get_voice_emotion_pipeline() | |
| model_loaded = pipeline is not None | |
| return { | |
| "model_name": "firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3", | |
| "model_loaded": model_loaded, | |
| "supported_formats": ["wav", "mp3", "flac", "m4a", "webm", "ogg", "opus"], | |
| "max_duration_seconds": 30, | |
| "sample_rate": 16000, | |
| "channels": 1, | |
| "max_file_size_mb": 15 | |
| } | |
| except Exception as e: | |
| logger.error(f"Failed to get model info: {e}") | |
| raise HTTPException(status_code=500, detail=f"Failed to get model info: {str(e)}") | |
| async def analyze_audio(audio: UploadFile = File(...)): | |
| """ | |
| Analyze voice emotion from uploaded audio file. | |
| Args: | |
| audio: Audio file (wav, mp3, flac, m4a, webm, ogg, opus) | |
| Returns: | |
| JSON response with emotion, confidence, and all results | |
| """ | |
| try: | |
| # Validate file | |
| if not audio.filename: | |
| raise HTTPException(status_code=400, detail="No filename provided") | |
| # Check file size (15MB limit) | |
| audio.file.seek(0, os.SEEK_END) | |
| file_size = audio.file.tell() | |
| audio.file.seek(0) | |
| if file_size == 0: | |
| raise HTTPException(status_code=400, detail="Audio file is empty") | |
| if file_size > 15 * 1024 * 1024: # 15MB | |
| raise HTTPException(status_code=400, detail="Audio file too large (max 15MB)") | |
| # Analyze the audio | |
| result = analyze_voice_emotion_from_file(audio) | |
| return { | |
| "ok": True, | |
| "emotion": result["emotion"], | |
| "confidence": result["confidence"], | |
| "all_results": result["all_results"] | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| logger.error(f"Error analyzing audio: {e}") | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Failed to analyze audio: {str(e)}" | |
| ) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |