Spaces:

SaniaE
/

Image_Captioning_Ensemble_API

Running

SaniaE commited on 19 days ago

Commit

83aed07

verified ·

1 Parent(s): b197c79

updated calibration

Files changed (1) hide show

app.py CHANGED Viewed

@@ -102,7 +102,6 @@ async def generate_endpoint(
 @app.post("/ui-tester")
 async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
-    """Calibrated Image-Text Matching using BLIP's native Cross-Entropy loss."""
     image = Image.open(file.file).convert("RGB")
     blip_data = MODELS["blip"]
@@ -110,17 +109,23 @@ async def ui_tester(file: UploadFile = File(...), description: str = Query(...))
     with torch.no_grad():
         outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
-        # Use mean loss (per-token) to avoid length bias
         loss = outputs.loss.item()
-        # Calibration: Mapping typical loss ranges (1.5 - 4.5) to a 0-100% scale
-        # Loss of 1.5 -> ~100%, Loss of 5.0 -> 0%
-        normalized_score = max(0, min(1, (5.0 - loss) / 3.5))
-        percentage_score = round(normalized_score * 100, 2)
     return {
         "confidence_score": f"{percentage_score}%",
         "raw_loss": round(loss, 4),
-        "status": "Match Found" if percentage_score > 40 else "No Match",
-        "is_valid": percentage_score > 40
     }

 @app.post("/ui-tester")
 async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
     image = Image.open(file.file).convert("RGB")
     blip_data = MODELS["blip"]
     with torch.no_grad():
         outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
         loss = outputs.loss.item()
+        # 1. Temperature Scaling (Softens the penalty for minor mismatches)
+        T = 2.0
+        # 2. Logistic Calibration (Sigmoid)
+        # We center the curve around a loss of 3.5 (the "uncertainty zone")
+        # This makes the jump from 4.0 to 3.0 much more significant in the % score
+        steepness = 1.5
+        midpoint = 3.5
+        calibrated_score = 1 / (1 + torch.exp(torch.tensor(steepness * (loss - midpoint) / T))).item()
+    percentage_score = round(calibrated_score * 100, 2)
     return {
         "confidence_score": f"{percentage_score}%",
         "raw_loss": round(loss, 4),
+        "status": "Match Found" if percentage_score > 50 else "Weak Match" if percentage_score > 25 else "No Match",
+        "is_valid": percentage_score > 50
     }