SaniaE commited on
Commit
05b6fbd
·
verified ·
1 Parent(s): 83aed07

updated scoring

Browse files
Files changed (1) hide show
  1. app.py +30 -19
app.py CHANGED
@@ -105,27 +105,38 @@ async def ui_tester(file: UploadFile = File(...), description: str = Query(...))
105
  image = Image.open(file.file).convert("RGB")
106
  blip_data = MODELS["blip"]
107
 
108
- inputs = blip_data["processor"](images=image, text=description, return_tensors="pt").to(DEVICE)
109
-
 
110
  with torch.no_grad():
111
- outputs = blip_data["model"](**inputs, labels=inputs["input_ids"])
112
- loss = outputs.loss.item()
113
-
114
- # 1. Temperature Scaling (Softens the penalty for minor mismatches)
115
- T = 2.0
116
 
117
- # 2. Logistic Calibration (Sigmoid)
118
- # We center the curve around a loss of 3.5 (the "uncertainty zone")
119
- # This makes the jump from 4.0 to 3.0 much more significant in the % score
120
- steepness = 1.5
121
- midpoint = 3.5
122
- calibrated_score = 1 / (1 + torch.exp(torch.tensor(steepness * (loss - midpoint) / T))).item()
123
-
124
- percentage_score = round(calibrated_score * 100, 2)
 
 
 
 
 
 
 
125
 
 
 
 
 
 
126
  return {
127
- "confidence_score": f"{percentage_score}%",
128
- "raw_loss": round(loss, 4),
129
- "status": "Match Found" if percentage_score > 50 else "Weak Match" if percentage_score > 25 else "No Match",
130
- "is_valid": percentage_score > 50
 
131
  }
 
105
  image = Image.open(file.file).convert("RGB")
106
  blip_data = MODELS["blip"]
107
 
108
+ # 1. Get the Baseline (The model's "Perfect" loss for this image)
109
+ # We generate a caption to see what the model thinks is a 100% match
110
+ inputs_gen = blip_data["processor"](images=image, return_tensors="pt").to(DEVICE)
111
  with torch.no_grad():
112
+ generated_ids = blip_data["model"].generate(**inputs_gen, max_length=50)
113
+ baseline_caption = blip_data["processor"].decode(generated_ids[0], skip_special_tokens=True)
 
 
 
114
 
115
+ # Calculate loss for the model's own generated caption
116
+ baseline_inputs = blip_data["processor"](images=image, text=baseline_caption, return_tensors="pt").to(DEVICE)
117
+ baseline_outputs = blip_data["model"](**baseline_inputs, labels=baseline_inputs["input_ids"])
118
+ baseline_loss = baseline_outputs.loss.item()
119
+
120
+ # 2. Calculate User Loss
121
+ user_inputs = blip_data["processor"](images=image, text=description, return_tensors="pt").to(DEVICE)
122
+ with torch.no_grad():
123
+ user_outputs = blip_data["model"](**user_inputs, labels=user_inputs["input_ids"])
124
+ user_loss = user_outputs.loss.item()
125
+
126
+ # 3. Relative Scoring (Intuition-based)
127
+ # If user_loss is equal to or better than baseline_loss, it's a 90-100% match.
128
+ # This accounts for the fact that UI images naturally have higher raw loss.
129
+ relative_ratio = baseline_loss / user_loss
130
 
131
+ # Apply a curve to make it feel "right"
132
+ # A ratio of 1.0 (perfect match to model) -> ~95%
133
+ # A ratio of 0.7 (pretty good) -> ~75%
134
+ confidence_score = min(100.0, round((relative_ratio ** 2) * 95, 2))
135
+
136
  return {
137
+ "confidence_score": f"{confidence_score}%",
138
+ "model_best_guess": baseline_caption,
139
+ "raw_user_loss": round(user_loss, 4),
140
+ "status": "Match Found" if confidence_score > 60 else "No Match",
141
+ "is_valid": confidence_score > 60
142
  }