AUDIO_Emotion_Recognition

Sleeping

App Files Files Community

AUDIO_Emotion_Recognition / app.py

mohannad125

Update app.py

251ee29 verified 8 months ago

raw

history blame contribute delete

7.15 kB

	import os
	# تعطيل تخزين numba المؤقت ومنع JIT وخزن الكاش في /tmp
	os.environ['NUMBA_DISABLE_JIT'] = '1'
	os.environ['NUMBA_CACHE_DIR'] = '/tmp'

	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
	from fastapi.middleware.cors import CORSMiddleware
	import torch
	import torch.nn as nn
	import librosa
	import numpy as np
	import tempfile
	import warnings
	import uvicorn
	from typing import Dict, List, Optional
	from pydantic import BaseModel
	from model_loader import safe_load_model, create_dummy_model
	import soundfile as sf
	import torchaudio

	# تجاهل التحذيرات غير المهمة
	warnings.filterwarnings("ignore", category=FutureWarning)
	warnings.filterwarnings("ignore", category=UserWarning)

	# إنشاء تطبيق FastAPI
	app = FastAPI(
	title="Audio Emotion Recognition API",
	description="API لتحليل المشاعر من الملفات الصوتية",
	version="1.0.0",
	docs_url="/docs",
	redoc_url="/redoc"
	)

	# إعداد CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# تكوين الرفع
	UPLOAD_FOLDER = 'uploads'
	ALLOWED_EXTENSIONS = {'wav', 'mp3', 'flac', 'm4a', 'ogg'}
	MAX_FILE_SIZE = 16 * 1024 * 1024 # 16MB
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)

	# استجابات النماذج
	class EmotionPrediction(BaseModel):
	predicted_emotion: str
	confidence: float
	all_probabilities: Dict[str, float]

	class HealthResponse(BaseModel):
	status: str
	model_loaded: bool
	device: str

	class APIInfo(BaseModel):
	message: str
	status: str
	supported_formats: List[str]
	max_file_size: str

	# تعريف نموذج الشبكة العصبية
	class EmotionNet(nn.Module):
	def __init__(self, num_classes=8):
	super(EmotionNet, self).__init__()
	self.cnn = nn.Sequential(
	nn.Conv1d(1, 64, kernel_size=5, stride=1),
	nn.BatchNorm1d(64),
	nn.ReLU(),
	nn.MaxPool1d(2),
	nn.Dropout(0.3)
	)
	self.lstm = nn.LSTM(input_size=64, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True)
	self.fc = nn.Sequential(
	nn.Linear(128*2, 64),
	nn.ReLU(),
	nn.Dropout(0.3),
	nn.Linear(64, num_classes)
	)

	def forward(self, x):
	x = self.cnn(x.unsqueeze(1))
	x = x.permute(0, 2, 1)
	_, (h_n, _) = self.lstm(x)
	h_n = torch.cat((h_n[-2], h_n[-1]), dim=1)
	return self.fc(h_n)

	# دالة تحميل النموذج
	def load_model():
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model_instance = EmotionNet(num_classes=8)
	loaded_model, success = safe_load_model('cnn_lstm_emotion_model.pth', device, model_instance)
	if loaded_model is None or not success:
	print("⚠️ فشل تحميل النموذج، سيتم استخدام نموذج وهمي.")
	model_instance = create_dummy_model(num_classes=8).to(device)
	return model_instance, device
	return loaded_model, device

	# تحميل النموذج عند بدء التطبيق
	model, device = load_model()

	# تسميات المشاعر
	EMOTION_LABELS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise', 'calm']

	def allowed_file(filename: str) -> bool:
	return '.' in filename and filename.rsplit('.',1)[1].lower() in ALLOWED_EXTENSIONS

	# دالة استخراج الميزات
	def extract_features(file_path: str) -> np.ndarray:
	"""
	استخراج السمات الصوتية باستخدام soundfile و torchaudio MFCC
	يُرمى RuntimeError عند الفشل
	"""
	try:
	# قراءة الإشارة والسعر العيني
	signal, sr = sf.read(file_path, dtype='float32')
	if signal.ndim > 1:
	signal = np.mean(signal, axis=1)

	# تحويل numpy إلى Tensor لاستخدام torchaudio
	waveform = torch.from_numpy(signal).unsqueeze(0) # (1, time)
	# إنشاء محول MFCC
	mfcc_transform = torchaudio.transforms.MFCC(sample_rate=sr, n_mfcc=40)
	mfcc = mfcc_transform(waveform) # (1, n_mfcc, time_frames)

	# حساب المتوسط على الإطارات الزمنية
	features = mfcc.mean(dim=2).squeeze(0).numpy() # (n_mfcc,)
	return features
	except Exception as e:
	raise RuntimeError(f"Feature extraction failed: {e}")

	# المسارات (Routes) (Routes)
	@app.get("/", response_class=RedirectResponse)
	async def root():
	return RedirectResponse(url="/web")

	@app.get("/web", response_class=HTMLResponse)
	async def web_interface():
	try:
	with open('index.html', 'r', encoding='utf-8') as f:
	return HTMLResponse(content=f.read())
	except FileNotFoundError:
	raise HTTPException(status_code=404, detail="Web interface not found")

	@app.get("/api", response_model=APIInfo)
	async def api_info():
	return APIInfo(
	message="Audio Emotion Recognition API",
	status="running",
	supported_formats=list(ALLOWED_EXTENSIONS),
	max_file_size="16MB"
	)

	@app.post("/predict", response_model=EmotionPrediction)
	async def predict_emotion(audio: UploadFile = File(...)):
	if model is None:
	raise HTTPException(status_code=500, detail="Model not loaded")
	if not audio.filename:
	raise HTTPException(status_code=400, detail="No file selected")
	if not allowed_file(audio.filename):
	raise HTTPException(status_code=400, detail={"error":"Unsupported file format","supported_formats":list(ALLOWED_EXTENSIONS)})

	# حفظ مؤقت
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
	content = await audio.read()
	tmp.write(content)
	tmp_path = tmp.name
	try:
	try:
	features = extract_features(tmp_path)
	except RuntimeError as e:
	raise HTTPException(status_code=400, detail=str(e))

	tensor = torch.FloatTensor(features).unsqueeze(0).to(device)
	with torch.no_grad():
	outputs = model(tensor)
	probs = torch.softmax(outputs, dim=1)[0]
	result = {emotion: float(probs[i]) for i, emotion in enumerate(EMOTION_LABELS)}
	pred = max(result, key=result.get)
	return EmotionPrediction(predicted_emotion=pred, confidence=result[pred], all_probabilities=result)
	finally:
	os.remove(tmp_path)

	@app.get("/health", response_model=HealthResponse)
	async def health_check():
	return HealthResponse(status="healthy", model_loaded=model is not None, device=str(device))

	# معالج الأخطاء العام
	@app.exception_handler(Exception)
	async def general_exception_handler(request, exc):
	return JSONResponse(status_code=500, content={"detail":f"Internal server error: {exc}"})

	# تشغيل
	if __name__ == '__main__':
	print(f"Starting API - model_loaded={model is not None}, device={device}")
	uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)