import os # تعطيل تخزين numba المؤقت ومنع JIT وخزن الكاش في /tmp os.environ['NUMBA_DISABLE_JIT'] = '1' os.environ['NUMBA_CACHE_DIR'] = '/tmp' from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse from fastapi.middleware.cors import CORSMiddleware import torch import torch.nn as nn import librosa import numpy as np import tempfile import warnings import uvicorn from typing import Dict, List, Optional from pydantic import BaseModel from model_loader import safe_load_model, create_dummy_model import soundfile as sf import torchaudio # تجاهل التحذيرات غير المهمة warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning) # إنشاء تطبيق FastAPI app = FastAPI( title="Audio Emotion Recognition API", description="API لتحليل المشاعر من الملفات الصوتية", version="1.0.0", docs_url="/docs", redoc_url="/redoc" ) # إعداد CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # تكوين الرفع UPLOAD_FOLDER = 'uploads' ALLOWED_EXTENSIONS = {'wav', 'mp3', 'flac', 'm4a', 'ogg'} MAX_FILE_SIZE = 16 * 1024 * 1024 # 16MB os.makedirs(UPLOAD_FOLDER, exist_ok=True) # استجابات النماذج class EmotionPrediction(BaseModel): predicted_emotion: str confidence: float all_probabilities: Dict[str, float] class HealthResponse(BaseModel): status: str model_loaded: bool device: str class APIInfo(BaseModel): message: str status: str supported_formats: List[str] max_file_size: str # تعريف نموذج الشبكة العصبية class EmotionNet(nn.Module): def __init__(self, num_classes=8): super(EmotionNet, self).__init__() self.cnn = nn.Sequential( nn.Conv1d(1, 64, kernel_size=5, stride=1), nn.BatchNorm1d(64), nn.ReLU(), nn.MaxPool1d(2), nn.Dropout(0.3) ) self.lstm = nn.LSTM(input_size=64, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True) self.fc = nn.Sequential( nn.Linear(128*2, 64), nn.ReLU(), nn.Dropout(0.3), nn.Linear(64, num_classes) ) def forward(self, x): x = self.cnn(x.unsqueeze(1)) x = x.permute(0, 2, 1) _, (h_n, _) = self.lstm(x) h_n = torch.cat((h_n[-2], h_n[-1]), dim=1) return self.fc(h_n) # دالة تحميل النموذج def load_model(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_instance = EmotionNet(num_classes=8) loaded_model, success = safe_load_model('cnn_lstm_emotion_model.pth', device, model_instance) if loaded_model is None or not success: print("⚠️ فشل تحميل النموذج، سيتم استخدام نموذج وهمي.") model_instance = create_dummy_model(num_classes=8).to(device) return model_instance, device return loaded_model, device # تحميل النموذج عند بدء التطبيق model, device = load_model() # تسميات المشاعر EMOTION_LABELS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise', 'calm'] def allowed_file(filename: str) -> bool: return '.' in filename and filename.rsplit('.',1)[1].lower() in ALLOWED_EXTENSIONS # دالة استخراج الميزات def extract_features(file_path: str) -> np.ndarray: """ استخراج السمات الصوتية باستخدام soundfile و torchaudio MFCC يُرمى RuntimeError عند الفشل """ try: # قراءة الإشارة والسعر العيني signal, sr = sf.read(file_path, dtype='float32') if signal.ndim > 1: signal = np.mean(signal, axis=1) # تحويل numpy إلى Tensor لاستخدام torchaudio waveform = torch.from_numpy(signal).unsqueeze(0) # (1, time) # إنشاء محول MFCC mfcc_transform = torchaudio.transforms.MFCC(sample_rate=sr, n_mfcc=40) mfcc = mfcc_transform(waveform) # (1, n_mfcc, time_frames) # حساب المتوسط على الإطارات الزمنية features = mfcc.mean(dim=2).squeeze(0).numpy() # (n_mfcc,) return features except Exception as e: raise RuntimeError(f"Feature extraction failed: {e}") # المسارات (Routes) (Routes) @app.get("/", response_class=RedirectResponse) async def root(): return RedirectResponse(url="/web") @app.get("/web", response_class=HTMLResponse) async def web_interface(): try: with open('index.html', 'r', encoding='utf-8') as f: return HTMLResponse(content=f.read()) except FileNotFoundError: raise HTTPException(status_code=404, detail="Web interface not found") @app.get("/api", response_model=APIInfo) async def api_info(): return APIInfo( message="Audio Emotion Recognition API", status="running", supported_formats=list(ALLOWED_EXTENSIONS), max_file_size="16MB" ) @app.post("/predict", response_model=EmotionPrediction) async def predict_emotion(audio: UploadFile = File(...)): if model is None: raise HTTPException(status_code=500, detail="Model not loaded") if not audio.filename: raise HTTPException(status_code=400, detail="No file selected") if not allowed_file(audio.filename): raise HTTPException(status_code=400, detail={"error":"Unsupported file format","supported_formats":list(ALLOWED_EXTENSIONS)}) # حفظ مؤقت with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp: content = await audio.read() tmp.write(content) tmp_path = tmp.name try: try: features = extract_features(tmp_path) except RuntimeError as e: raise HTTPException(status_code=400, detail=str(e)) tensor = torch.FloatTensor(features).unsqueeze(0).to(device) with torch.no_grad(): outputs = model(tensor) probs = torch.softmax(outputs, dim=1)[0] result = {emotion: float(probs[i]) for i, emotion in enumerate(EMOTION_LABELS)} pred = max(result, key=result.get) return EmotionPrediction(predicted_emotion=pred, confidence=result[pred], all_probabilities=result) finally: os.remove(tmp_path) @app.get("/health", response_model=HealthResponse) async def health_check(): return HealthResponse(status="healthy", model_loaded=model is not None, device=str(device)) # معالج الأخطاء العام @app.exception_handler(Exception) async def general_exception_handler(request, exc): return JSONResponse(status_code=500, content={"detail":f"Internal server error: {exc}"}) # تشغيل if __name__ == '__main__': print(f"Starting API - model_loaded={model is not None}, device={device}") uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)