SaniaE commited on
Commit
edccc41
·
verified ·
1 Parent(s): 05b6fbd

relative scoring

Browse files
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -105,11 +105,16 @@ async def ui_tester(file: UploadFile = File(...), description: str = Query(...))
105
  image = Image.open(file.file).convert("RGB")
106
  blip_data = MODELS["blip"]
107
 
108
- # 1. Get the Baseline (The model's "Perfect" loss for this image)
109
- # We generate a caption to see what the model thinks is a 100% match
110
  inputs_gen = blip_data["processor"](images=image, return_tensors="pt").to(DEVICE)
111
  with torch.no_grad():
112
- generated_ids = blip_data["model"].generate(**inputs_gen, max_length=50)
 
 
 
 
 
113
  baseline_caption = blip_data["processor"].decode(generated_ids[0], skip_special_tokens=True)
114
 
115
  # Calculate loss for the model's own generated caption
@@ -117,26 +122,29 @@ async def ui_tester(file: UploadFile = File(...), description: str = Query(...))
117
  baseline_outputs = blip_data["model"](**baseline_inputs, labels=baseline_inputs["input_ids"])
118
  baseline_loss = baseline_outputs.loss.item()
119
 
120
- # 2. Calculate User Loss
121
  user_inputs = blip_data["processor"](images=image, text=description, return_tensors="pt").to(DEVICE)
122
  with torch.no_grad():
123
  user_outputs = blip_data["model"](**user_inputs, labels=user_inputs["input_ids"])
124
  user_loss = user_outputs.loss.item()
125
 
126
- # 3. Relative Scoring (Intuition-based)
127
- # If user_loss is equal to or better than baseline_loss, it's a 90-100% match.
128
- # This accounts for the fact that UI images naturally have higher raw loss.
129
  relative_ratio = baseline_loss / user_loss
130
 
131
- # Apply a curve to make it feel "right"
132
- # A ratio of 1.0 (perfect match to model) -> ~95%
133
- # A ratio of 0.7 (pretty good) -> ~75%
134
- confidence_score = min(100.0, round((relative_ratio ** 2) * 95, 2))
135
 
136
  return {
137
  "confidence_score": f"{confidence_score}%",
138
- "model_best_guess": baseline_caption,
139
- "raw_user_loss": round(user_loss, 4),
140
- "status": "Match Found" if confidence_score > 60 else "No Match",
141
- "is_valid": confidence_score > 60
 
 
 
 
142
  }
 
105
  image = Image.open(file.file).convert("RGB")
106
  blip_data = MODELS["blip"]
107
 
108
+ # 1. GET THE BASELINE (The model's "Perfect" loss for its own perception)
109
+ # We generate a caption using high-precision parameters to see its "truth"
110
  inputs_gen = blip_data["processor"](images=image, return_tensors="pt").to(DEVICE)
111
  with torch.no_grad():
112
+ generated_ids = blip_data["model"].generate(
113
+ **inputs_gen,
114
+ max_length=50,
115
+ num_beams=5, # Higher beams for a more stable "best guess"
116
+ temperature=1.0
117
+ )
118
  baseline_caption = blip_data["processor"].decode(generated_ids[0], skip_special_tokens=True)
119
 
120
  # Calculate loss for the model's own generated caption
 
122
  baseline_outputs = blip_data["model"](**baseline_inputs, labels=baseline_inputs["input_ids"])
123
  baseline_loss = baseline_outputs.loss.item()
124
 
125
+ # 2. CALCULATE USER LOSS
126
  user_inputs = blip_data["processor"](images=image, text=description, return_tensors="pt").to(DEVICE)
127
  with torch.no_grad():
128
  user_outputs = blip_data["model"](**user_inputs, labels=user_inputs["input_ids"])
129
  user_loss = user_outputs.loss.item()
130
 
131
+ # 3. RELATIVE SCORING (The "Intuition" Fix)
132
+ # This ratio tells us how close the user is to the model's internal maximum confidence
 
133
  relative_ratio = baseline_loss / user_loss
134
 
135
+ # Scaling: If the user matches the model's perception, they get ~95%.
136
+ # If they are significantly off (like Orange vs Yellow), they land in the 60s.
137
+ # This prevents the 0% "confusion ceiling" you saw earlier.
138
+ confidence_score = min(100.0, round((relative_ratio ** 1.5) * 100, 2))
139
 
140
  return {
141
  "confidence_score": f"{confidence_score}%",
142
+ "model_perceived_caption": baseline_caption,
143
+ "raw_metrics": {
144
+ "user_loss": round(user_loss, 4),
145
+ "baseline_loss": round(baseline_loss, 4),
146
+ "delta": round(user_loss - baseline_loss, 4)
147
+ },
148
+ "status": "Match Found" if confidence_score > 55 else "Partial Match" if confidence_score > 30 else "No Match",
149
+ "is_valid": confidence_score > 55
150
  }