mohannad125's picture
Update app.py
251ee29 verified
import os
# تعطيل تخزين numba المؤقت ومنع JIT وخزن الكاش في /tmp
os.environ['NUMBA_DISABLE_JIT'] = '1'
os.environ['NUMBA_CACHE_DIR'] = '/tmp'
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware
import torch
import torch.nn as nn
import librosa
import numpy as np
import tempfile
import warnings
import uvicorn
from typing import Dict, List, Optional
from pydantic import BaseModel
from model_loader import safe_load_model, create_dummy_model
import soundfile as sf
import torchaudio
# تجاهل التحذيرات غير المهمة
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
# إنشاء تطبيق FastAPI
app = FastAPI(
title="Audio Emotion Recognition API",
description="API لتحليل المشاعر من الملفات الصوتية",
version="1.0.0",
docs_url="/docs",
redoc_url="/redoc"
)
# إعداد CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# تكوين الرفع
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'wav', 'mp3', 'flac', 'm4a', 'ogg'}
MAX_FILE_SIZE = 16 * 1024 * 1024 # 16MB
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# استجابات النماذج
class EmotionPrediction(BaseModel):
predicted_emotion: str
confidence: float
all_probabilities: Dict[str, float]
class HealthResponse(BaseModel):
status: str
model_loaded: bool
device: str
class APIInfo(BaseModel):
message: str
status: str
supported_formats: List[str]
max_file_size: str
# تعريف نموذج الشبكة العصبية
class EmotionNet(nn.Module):
def __init__(self, num_classes=8):
super(EmotionNet, self).__init__()
self.cnn = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=5, stride=1),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(2),
nn.Dropout(0.3)
)
self.lstm = nn.LSTM(input_size=64, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True)
self.fc = nn.Sequential(
nn.Linear(128*2, 64),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(64, num_classes)
)
def forward(self, x):
x = self.cnn(x.unsqueeze(1))
x = x.permute(0, 2, 1)
_, (h_n, _) = self.lstm(x)
h_n = torch.cat((h_n[-2], h_n[-1]), dim=1)
return self.fc(h_n)
# دالة تحميل النموذج
def load_model():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_instance = EmotionNet(num_classes=8)
loaded_model, success = safe_load_model('cnn_lstm_emotion_model.pth', device, model_instance)
if loaded_model is None or not success:
print("⚠️ فشل تحميل النموذج، سيتم استخدام نموذج وهمي.")
model_instance = create_dummy_model(num_classes=8).to(device)
return model_instance, device
return loaded_model, device
# تحميل النموذج عند بدء التطبيق
model, device = load_model()
# تسميات المشاعر
EMOTION_LABELS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise', 'calm']
def allowed_file(filename: str) -> bool:
return '.' in filename and filename.rsplit('.',1)[1].lower() in ALLOWED_EXTENSIONS
# دالة استخراج الميزات
def extract_features(file_path: str) -> np.ndarray:
"""
استخراج السمات الصوتية باستخدام soundfile و torchaudio MFCC
يُرمى RuntimeError عند الفشل
"""
try:
# قراءة الإشارة والسعر العيني
signal, sr = sf.read(file_path, dtype='float32')
if signal.ndim > 1:
signal = np.mean(signal, axis=1)
# تحويل numpy إلى Tensor لاستخدام torchaudio
waveform = torch.from_numpy(signal).unsqueeze(0) # (1, time)
# إنشاء محول MFCC
mfcc_transform = torchaudio.transforms.MFCC(sample_rate=sr, n_mfcc=40)
mfcc = mfcc_transform(waveform) # (1, n_mfcc, time_frames)
# حساب المتوسط على الإطارات الزمنية
features = mfcc.mean(dim=2).squeeze(0).numpy() # (n_mfcc,)
return features
except Exception as e:
raise RuntimeError(f"Feature extraction failed: {e}")
# المسارات (Routes) (Routes)
@app.get("/", response_class=RedirectResponse)
async def root():
return RedirectResponse(url="/web")
@app.get("/web", response_class=HTMLResponse)
async def web_interface():
try:
with open('index.html', 'r', encoding='utf-8') as f:
return HTMLResponse(content=f.read())
except FileNotFoundError:
raise HTTPException(status_code=404, detail="Web interface not found")
@app.get("/api", response_model=APIInfo)
async def api_info():
return APIInfo(
message="Audio Emotion Recognition API",
status="running",
supported_formats=list(ALLOWED_EXTENSIONS),
max_file_size="16MB"
)
@app.post("/predict", response_model=EmotionPrediction)
async def predict_emotion(audio: UploadFile = File(...)):
if model is None:
raise HTTPException(status_code=500, detail="Model not loaded")
if not audio.filename:
raise HTTPException(status_code=400, detail="No file selected")
if not allowed_file(audio.filename):
raise HTTPException(status_code=400, detail={"error":"Unsupported file format","supported_formats":list(ALLOWED_EXTENSIONS)})
# حفظ مؤقت
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
content = await audio.read()
tmp.write(content)
tmp_path = tmp.name
try:
try:
features = extract_features(tmp_path)
except RuntimeError as e:
raise HTTPException(status_code=400, detail=str(e))
tensor = torch.FloatTensor(features).unsqueeze(0).to(device)
with torch.no_grad():
outputs = model(tensor)
probs = torch.softmax(outputs, dim=1)[0]
result = {emotion: float(probs[i]) for i, emotion in enumerate(EMOTION_LABELS)}
pred = max(result, key=result.get)
return EmotionPrediction(predicted_emotion=pred, confidence=result[pred], all_probabilities=result)
finally:
os.remove(tmp_path)
@app.get("/health", response_model=HealthResponse)
async def health_check():
return HealthResponse(status="healthy", model_loaded=model is not None, device=str(device))
# معالج الأخطاء العام
@app.exception_handler(Exception)
async def general_exception_handler(request, exc):
return JSONResponse(status_code=500, content={"detail":f"Internal server error: {exc}"})
# تشغيل
if __name__ == '__main__':
print(f"Starting API - model_loaded={model is not None}, device={device}")
uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)