SaniaE commited on
Commit
b197c79
·
verified ·
1 Parent(s): ffec26b

updated scoring logic

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -102,6 +102,7 @@ async def generate_endpoint(
102
 
103
  @app.post("/ui-tester")
104
  async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
 
105
  image = Image.open(file.file).convert("RGB")
106
  blip_data = MODELS["blip"]
107
 
@@ -109,19 +110,17 @@ async def ui_tester(file: UploadFile = File(...), description: str = Query(...))
109
 
110
  with torch.no_grad():
111
  outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
 
112
  loss = outputs.loss.item()
113
 
114
- # Scaling the score to make 0.3 look like a "Strong Match"
115
- # and 0.2 look like a "Poor Match"
116
- # Using a steep sigmoid or a linear multiplier:
117
- score = max(0, min(1, (1 / (loss + 1e-6)) * 0.5))
118
-
119
- # Define thresholds based on your tests
120
- status = "Match Found" if score > 0.25 else "No Match"
121
 
122
  return {
 
123
  "raw_loss": round(loss, 4),
124
- "confidence_score": round(score, 4),
125
- "status": status,
126
- "is_valid": score > 0.25
127
  }
 
102
 
103
  @app.post("/ui-tester")
104
  async def ui_tester(file: UploadFile = File(...), description: str = Query(...)):
105
+ """Calibrated Image-Text Matching using BLIP's native Cross-Entropy loss."""
106
  image = Image.open(file.file).convert("RGB")
107
  blip_data = MODELS["blip"]
108
 
 
110
 
111
  with torch.no_grad():
112
  outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
113
+ # Use mean loss (per-token) to avoid length bias
114
  loss = outputs.loss.item()
115
 
116
+ # Calibration: Mapping typical loss ranges (1.5 - 4.5) to a 0-100% scale
117
+ # Loss of 1.5 -> ~100%, Loss of 5.0 -> 0%
118
+ normalized_score = max(0, min(1, (5.0 - loss) / 3.5))
119
+ percentage_score = round(normalized_score * 100, 2)
 
 
 
120
 
121
  return {
122
+ "confidence_score": f"{percentage_score}%",
123
  "raw_loss": round(loss, 4),
124
+ "status": "Match Found" if percentage_score > 40 else "No Match",
125
+ "is_valid": percentage_score > 40
 
126
  }