Spaces:

Hammad712
/

lid

Sleeping

App Files Files Community

Hammad712 commited on Apr 14

Commit

c4b4df8

verified ·

1 Parent(s): 18866cf

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -68

app.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import logging
-from fastapi import FastAPI, UploadFile, File, HTTPException
 import torchaudio
 import torch.nn.functional as F
-import torch
 import numpy as np
 import onnxruntime as ort
 from huggingface_hub import hf_hub_download
-import os
 # ==========================================
-# 1. Setup Production Logging
 # ==========================================
 logging.basicConfig(
     level=logging.INFO,
@@ -18,65 +19,67 @@ logging.basicConfig(
 )
 logger = logging.getLogger("LID_Engine")
-app = FastAPI(title="Pakistani LID AI Engine (Production)")
 # ==========================================
-# 2. Model Initialization (Fixing ONNX .data issue)
 # ==========================================
-logger.info("Initializing Application...")
 try:
-    # Creating a local directory so ONNX doesn't get confused in HF hidden cache
     os.makedirs("local_model", exist_ok=True)
-    logger.info("Downloading ONNX Data weights to local folder...")
-    hf_hub_download(
-        repo_id="Hammad712/pakistani-lid-v3-sota",
-        filename="pakistani_lid_v3.onnx.data",
-        local_dir="local_model"
-    )
-    logger.info("Downloading ONNX Structure to local folder...")
-    hf_hub_download(
-        repo_id="Hammad712/pakistani-lid-v3-sota",
-        filename="pakistani_lid_v3.onnx",
-        local_dir="local_model"
-    )
-    logger.info("Loading ONNX Session for CPU...")
-    # Explicitly point to the local file we just downloaded
-    local_model_path = os.path.join("local_model", "pakistani_lid_v3.onnx")
-    session = ort.InferenceSession(local_model_path, providers=['CPUExecutionProvider'])
-    logger.info("✅ ONNX Session successfully loaded and ready!")
 except Exception as e:
-    logger.error(f"❌ Failed to load model during startup: {e}")
     raise e
 labels = ("balochi", "english", "pashto", "sindhi", "urdu")
 id2label = {i: label for i, label in enumerate(labels)}
 # ==========================================
-# 3. Core Inference Logic
 # ==========================================
 def predict_audio(audio_path):
     waveform, sr = torchaudio.load(audio_path)
     if waveform.shape[0] > 1: waveform = waveform.mean(dim=0, keepdim=True)
     if waveform.ndim == 1: waveform = waveform.unsqueeze(0)
-    target_frames = int(sr * 15)
-    if waveform.shape[1] > target_frames: waveform = waveform[:, :target_frames]
-    if sr != 16000: waveform = torchaudio.functional.resample(waveform, sr, 16000)
-    peak = waveform.abs().max().clamp(min=1e-6)
-    waveform = (waveform / peak) - waveform.mean()
     waveform = waveform / waveform.std().clamp(min=1e-6)
     length = waveform.shape[1]
-    mask = torch.zeros(16000 * 15, dtype=torch.long)
-    if length >= 16000 * 15:
-        waveform, mask[:] = waveform[:, :16000 * 15], 1
-    else:
         mask[:length] = 1
-        waveform = F.pad(waveform, (0, 16000 * 15 - length))
     ort_inputs = {
         "input_values": waveform.numpy(),
@@ -84,44 +87,34 @@ def predict_audio(audio_path):
     }
     logits = session.run(None, ort_inputs)[0]
-    exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
-    probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
     pred_id = np.argmax(probs, axis=1)[0]
     return id2label[pred_id], float(probs[0][pred_id])
 # ==========================================
-# 4. API Endpoints
 # ==========================================
 @app.post("/predict")
-async def predict_language(file: UploadFile = File(...)):
-    logger.info(f"Received request for file: {file.filename}")
-    if not file.filename.endswith(('.wav', '.mp3', '.m4a', '.ogg')):
-        logger.warning(f"Rejected invalid file type: {file.filename}")
-        raise HTTPException(status_code=400, detail="Invalid audio format. Please upload wav, mp3, m4a, or ogg.")
-    temp_audio_path = f"temp_{file.filename}"
     try:
-        # Save file
-        with open(temp_audio_path, "wb") as buffer:
-            buffer.write(await file.read())
-        # Predict
-        logger.info(f"Processing inference for {file.filename}...")
-        lang, confidence = predict_audio(temp_audio_path)
-        logger.info(f"✅ Prediction successful: {lang.upper()} ({confidence:.2%})")
-        # Cleanup
-        os.remove(temp_audio_path)
-        return {
-            "success": True,
-            "language": lang.upper(),
-            "confidence": round(confidence * 100, 2)
-        }
     except Exception as e:
-        logger.error(f"❌ Error processing {file.filename}: {str(e)}", exc_info=True)
-        if os.path.exists(temp_audio_path):
-            os.remove(temp_audio_path)
-        raise HTTPException(status_code=500, detail="Internal Server Error")

 import logging
+import os
+import torch
 import torchaudio
 import torch.nn.functional as F
 import numpy as np
 import onnxruntime as ort
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
 from huggingface_hub import hf_hub_download
 # ==========================================
+# 1. Setup Logging
 # ==========================================
 logging.basicConfig(
     level=logging.INFO,
 )
 logger = logging.getLogger("LID_Engine")
+app = FastAPI(title="Pakistani LID AI Engine (SOTA V3)")
+# ✅ FIXING CORS: Taake aapka HTML frontend isay hit kar sakay
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # ==========================================
+# 2. Model Initialization
 # ==========================================
+logger.info("Initializing SOTA Engine...")
 try:
     os.makedirs("local_model", exist_ok=True)
+    # Download weights and structure
+    logger.info("Downloading ONNX files...")
+    hf_hub_download(repo_id="Hammad712/pakistani-lid-v3-sota", filename="pakistani_lid_v3.onnx.data", local_dir="local_model")
+    model_path = hf_hub_download(repo_id="Hammad712/pakistani-lid-v3-sota", filename="pakistani_lid_v3.onnx", local_dir="local_model")
+    # Load ONNX session
+    session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
+    logger.info("✅ Model loaded successfully!")
 except Exception as e:
+    logger.error(f"❌ Initialization failed: {e}")
     raise e
 labels = ("balochi", "english", "pashto", "sindhi", "urdu")
 id2label = {i: label for i, label in enumerate(labels)}
 # ==========================================
+# 3. Inference Logic
 # ==========================================
 def predict_audio(audio_path):
+    # Torchaudio loading with fallback logic
     waveform, sr = torchaudio.load(audio_path)
     if waveform.shape[0] > 1: waveform = waveform.mean(dim=0, keepdim=True)
     if waveform.ndim == 1: waveform = waveform.unsqueeze(0)
+    # Resample and Preprocess
+    if sr != 16000:
+        waveform = torchaudio.functional.resample(waveform, sr, 16000)
+    target_frames = 16000 * 15
+    if waveform.shape[1] > target_frames:
+        waveform = waveform[:, :target_frames]
+    waveform = (waveform / waveform.abs().max().clamp(min=1e-6)) - waveform.mean()
     waveform = waveform / waveform.std().clamp(min=1e-6)
     length = waveform.shape[1]
+    mask = torch.zeros(target_frames, dtype=torch.long)
+    if length < target_frames:
         mask[:length] = 1
+        waveform = F.pad(waveform, (0, target_frames - length))
+    else:
+        mask[:] = 1
     ort_inputs = {
         "input_values": waveform.numpy(),
     }
     logits = session.run(None, ort_inputs)[0]
+    probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
     pred_id = np.argmax(probs, axis=1)[0]
     return id2label[pred_id], float(probs[0][pred_id])
 # ==========================================
+# 4. API Endpoint
 # ==========================================
 @app.post("/predict")
+async def predict(file: UploadFile = File(...)):
+    logger.info(f"Inference request: {file.filename}")
+    temp_path = f"temp_{file.filename}"
     try:
+        with open(temp_path, "wb") as f:
+            f.write(await file.read())
+        lang, conf = predict_audio(temp_path)
+        os.remove(temp_path)
+        logger.info(f"Result: {lang} ({conf:.2%})")
+        return {"success": True, "language": lang.upper(), "confidence": round(conf * 100, 2)}
     except Exception as e:
+        logger.error(f"Prediction error: {e}")
+        if os.path.exists(temp_path): os.remove(temp_path)
+        return {"success": False, "error": str(e)}
+@app.get("/")
+def health_check():
+    return {"status": "online", "model": "Pakistani LID V3 SOTA"}