Spaces:

anna0071234
/

voice-detection-sample

Sleeping

App Files Files Community

voice-detection-sample / app.py

anna0071234

Upload 3 files

c751c4c verified about 1 month ago

raw

history blame contribute delete

7.96 kB

	"""
	Voice Emotion Recognition API
	FastAPI application for analyzing voice emotions using Hugging Face transformers
	"""

	import logging
	import tempfile
	import os
	from typing import Dict, Optional
	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	import uvicorn

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Initialize FastAPI app
	app = FastAPI(
	title="Voice Emotion Recognition API",
	description="API for analyzing voice emotions using Hugging Face transformers",
	version="1.0.0"
	)

	# Add CORS middleware for Django app integration
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # In production, specify your Django app URL
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Global pipeline instance (loaded once for performance)
	_voice_emotion_pipeline = None

	def get_voice_emotion_pipeline():
	"""
	Get or initialize the voice emotion recognition pipeline.

	Returns:
	transformers.pipeline: Voice emotion recognition pipeline
	"""
	global _voice_emotion_pipeline

	if _voice_emotion_pipeline is None:
	try:
	from transformers import pipeline
	logger.info("Loading voice emotion recognition model...")
	_voice_emotion_pipeline = pipeline(
	"audio-classification",
	model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3"
	)
	logger.info("Voice emotion recognition model loaded successfully")
	except Exception as e:
	logger.error(f"Failed to load voice emotion model: {e}")
	raise

	return _voice_emotion_pipeline


	def analyze_voice_emotion_from_file(audio_file: UploadFile) -> Dict[str, any]:
	"""
	Analyze voice emotion from an uploaded audio file.

	Args:
	audio_file: FastAPI UploadFile containing audio data

	Returns:
	dict: Analysis results with emotion, confidence, and all results
	"""
	temp_file_path = None
	try:
	# Get the pipeline
	pipe = get_voice_emotion_pipeline()

	# Determine file extension from uploaded file
	file_extension = os.path.splitext(audio_file.filename)[1] if audio_file.filename else '.webm'

	# Save uploaded file to temporary location
	with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension, mode='wb') as temp_file:
	# Read and write the uploaded file content
	content = audio_file.file.read()
	temp_file.write(content)
	temp_file_path = temp_file.name

	logger.info(f"Wrote {len(content)} bytes to temp file: {temp_file_path}")

	try:
	# Analyze the audio file
	logger.info(f"Analyzing voice emotion from file: {audio_file.filename}")
	results = pipe(temp_file_path)

	# Get the top result (highest confidence)
	if not results:
	raise ValueError("No emotion analysis results returned")

	top_result = max(results, key=lambda x: x['score'])
	emotion_detected = top_result['label']
	confidence = top_result['score']

	logger.info(f"Voice emotion detected: {emotion_detected} (confidence: {confidence:.3f})")

	return {
	'emotion': emotion_detected,
	'confidence': confidence,
	'all_results': results
	}

	finally:
	# Clean up temporary file
	if temp_file_path and os.path.exists(temp_file_path):
	try:
	os.unlink(temp_file_path)
	except Exception as e:
	logger.warning(f"Failed to delete temporary file {temp_file_path}: {e}")

	except Exception as e:
	logger.error(f"Voice emotion analysis failed: {e}")
	# Clean up temp file on error
	if temp_file_path and os.path.exists(temp_file_path):
	try:
	os.unlink(temp_file_path)
	except:
	pass
	raise


	@app.get("/")
	async def greet_json():
	"""Health check / greeting endpoint"""
	return {
	"message": "Voice Emotion Recognition API",
	"status": "running",
	"version": "1.0.0",
	"endpoints": {
	"/analyze": "POST - Analyze voice emotion from audio file",
	"/health": "GET - Health check",
	"/model-info": "GET - Model information",
	"/docs": "GET - API documentation"
	}
	}


	@app.get("/health")
	async def health_check():
	"""Health check endpoint"""
	try:
	# Check if model is loaded
	pipeline = get_voice_emotion_pipeline()
	model_loaded = pipeline is not None

	return {
	"status": "healthy",
	"model_loaded": model_loaded,
	"service": "voice-emotion-recognition"
	}
	except Exception as e:
	logger.error(f"Health check failed: {e}")
	return JSONResponse(
	status_code=503,
	content={
	"status": "unhealthy",
	"error": str(e),
	"service": "voice-emotion-recognition"
	}
	)


	@app.get("/model-info")
	async def model_info():
	"""Get model information endpoint"""
	try:
	pipeline = get_voice_emotion_pipeline()
	model_loaded = pipeline is not None

	return {
	"model_name": "firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3",
	"model_loaded": model_loaded,
	"supported_formats": ["wav", "mp3", "flac", "m4a", "webm", "ogg", "opus"],
	"max_duration_seconds": 30,
	"sample_rate": 16000,
	"channels": 1,
	"max_file_size_mb": 15
	}
	except Exception as e:
	logger.error(f"Failed to get model info: {e}")
	raise HTTPException(status_code=500, detail=f"Failed to get model info: {str(e)}")


	@app.post("/analyze")
	async def analyze_audio(audio: UploadFile = File(...)):
	"""
	Analyze voice emotion from uploaded audio file.

	Args:
	audio: Audio file (wav, mp3, flac, m4a, webm, ogg, opus)

	Returns:
	JSON response with emotion, confidence, and all results
	"""
	try:
	# Validate file
	if not audio.filename:
	raise HTTPException(status_code=400, detail="No filename provided")

	# Check file size (15MB limit)
	audio.file.seek(0, os.SEEK_END)
	file_size = audio.file.tell()
	audio.file.seek(0)

	if file_size == 0:
	raise HTTPException(status_code=400, detail="Audio file is empty")

	if file_size > 15 * 1024 * 1024: # 15MB
	raise HTTPException(status_code=400, detail="Audio file too large (max 15MB)")

	# Analyze the audio
	result = analyze_voice_emotion_from_file(audio)

	return {
	"ok": True,
	"emotion": result["emotion"],
	"confidence": result["confidence"],
	"all_results": result["all_results"]
	}

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Error analyzing audio: {e}")
	raise HTTPException(
	status_code=500,
	detail=f"Failed to analyze audio: {str(e)}"
	)


	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)