updated calibration
Browse files
app.py
CHANGED
|
@@ -102,7 +102,6 @@ async def generate_endpoint(
|
|
| 102 |
|
| 103 |
@app.post("/ui-tester")
|
| 104 |
async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
|
| 105 |
-
"""Calibrated Image-Text Matching using BLIP's native Cross-Entropy loss."""
|
| 106 |
image = Image.open(file.file).convert("RGB")
|
| 107 |
blip_data = MODELS["blip"]
|
| 108 |
|
|
@@ -110,17 +109,23 @@ async def ui_tester(file: UploadFile = File(...), description: str = Query(...))
|
|
| 110 |
|
| 111 |
with torch.no_grad():
|
| 112 |
outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
|
| 113 |
-
# Use mean loss (per-token) to avoid length bias
|
| 114 |
loss = outputs.loss.item()
|
| 115 |
|
| 116 |
-
#
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
return {
|
| 122 |
"confidence_score": f"{percentage_score}%",
|
| 123 |
"raw_loss": round(loss, 4),
|
| 124 |
-
"status": "Match Found" if percentage_score >
|
| 125 |
-
"is_valid": percentage_score >
|
| 126 |
}
|
|
|
|
| 102 |
|
| 103 |
@app.post("/ui-tester")
|
| 104 |
async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
|
|
|
|
| 105 |
image = Image.open(file.file).convert("RGB")
|
| 106 |
blip_data = MODELS["blip"]
|
| 107 |
|
|
|
|
| 109 |
|
| 110 |
with torch.no_grad():
|
| 111 |
outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
|
|
|
|
| 112 |
loss = outputs.loss.item()
|
| 113 |
|
| 114 |
+
# 1. Temperature Scaling (Softens the penalty for minor mismatches)
|
| 115 |
+
T = 2.0
|
| 116 |
+
|
| 117 |
+
# 2. Logistic Calibration (Sigmoid)
|
| 118 |
+
# We center the curve around a loss of 3.5 (the "uncertainty zone")
|
| 119 |
+
# This makes the jump from 4.0 to 3.0 much more significant in the % score
|
| 120 |
+
steepness = 1.5
|
| 121 |
+
midpoint = 3.5
|
| 122 |
+
calibrated_score = 1 / (1 + torch.exp(torch.tensor(steepness * (loss - midpoint) / T))).item()
|
| 123 |
+
|
| 124 |
+
percentage_score = round(calibrated_score * 100, 2)
|
| 125 |
|
| 126 |
return {
|
| 127 |
"confidence_score": f"{percentage_score}%",
|
| 128 |
"raw_loss": round(loss, 4),
|
| 129 |
+
"status": "Match Found" if percentage_score > 50 else "Weak Match" if percentage_score > 25 else "No Match",
|
| 130 |
+
"is_valid": percentage_score > 50
|
| 131 |
}
|