Spaces:

SaniaE
/

Image_Captioning_Ensemble_API

Running

SaniaE commited on 19 days ago

Commit

b197c79

verified ·

1 Parent(s): ffec26b

updated scoring logic

Files changed (1) hide show

app.py CHANGED Viewed

@@ -102,6 +102,7 @@ async def generate_endpoint(
 @app.post("/ui-tester")
 async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
     image = Image.open(file.file).convert("RGB")
     blip_data = MODELS["blip"]
@@ -109,19 +110,17 @@ async def ui_tester(file: UploadFile = File(...), description: str = Query(...))
     with torch.no_grad():
         outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
         loss = outputs.loss.item()
-        # Scaling the score to make 0.3 look like a "Strong Match"
-        # and 0.2 look like a "Poor Match"
-        # Using a steep sigmoid or a linear multiplier:
-        score = max(0, min(1, (1 / (loss + 1e-6)) * 0.5))
-    # Define thresholds based on your tests
-    status = "Match Found" if score > 0.25 else "No Match"
     return {
         "raw_loss": round(loss, 4),
-        "confidence_score": round(score, 4),
-        "status": status,
-        "is_valid": score > 0.25
     }

 @app.post("/ui-tester")
 async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
+    """Calibrated Image-Text Matching using BLIP's native Cross-Entropy loss."""
     image = Image.open(file.file).convert("RGB")
     blip_data = MODELS["blip"]
     with torch.no_grad():
         outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
+        # Use mean loss (per-token) to avoid length bias
         loss = outputs.loss.item()
+        # Calibration: Mapping typical loss ranges (1.5 - 4.5) to a 0-100% scale
+        # Loss of 1.5 -> ~100%, Loss of 5.0 -> 0%
+        normalized_score = max(0, min(1, (5.0 - loss) / 3.5))
+        percentage_score = round(normalized_score * 100, 2)
     return {
+        "confidence_score": f"{percentage_score}%",
         "raw_loss": round(loss, 4),
+        "status": "Match Found" if percentage_score > 40 else "No Match",
+        "is_valid": percentage_score > 40
     }