Peiran commited on
Commit
ed54e20
·
1 Parent(s): f801064

UI update: mask model info, new layout (original on top, A/B bottom), per-image 4 scores, and CSV schema update

Browse files
Files changed (1) hide show
  1. app.py +93 -62
app.py CHANGED
@@ -90,12 +90,9 @@ def load_task(task_name: str):
90
  return pairs
91
 
92
 
93
- def _format_pair_header(pair: Dict[str, str]) -> str:
94
- return (
95
- f"**Test ID:** {pair['test_id']} \n"
96
- f"**Model A:** {pair['model1_name']} ({pair['model1_res']}) \n"
97
- f"**Model B:** {pair['model2_name']} ({pair['model2_res']})"
98
- )
99
 
100
 
101
  def _append_evaluation(task_name: str, pair: Dict[str, str], scores: Dict[str, int]) -> None:
@@ -113,10 +110,16 @@ def _append_evaluation(task_name: str, pair: Dict[str, str], scores: Dict[str, i
113
  "model2_res",
114
  "model1_path",
115
  "model2_path",
116
- "physical_interaction_fidelity_score",
117
- "optical_effect_accuracy_score",
118
- "semantic_functional_alignment_score",
119
- "overall_photorealism_score",
 
 
 
 
 
 
120
  ]
121
 
122
  with open(csv_path, "a", newline="", encoding="utf-8") as csv_file:
@@ -143,7 +146,8 @@ def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
143
  pairs = load_task(task_name)
144
  pair = pairs[0]
145
  header = _format_pair_header(pair)
146
- default_scores = [3, 3, 3, 3]
 
147
  return (
148
  pairs,
149
  gr.update(value=0, minimum=0, maximum=len(pairs) - 1, visible=(len(pairs) > 1)),
@@ -169,10 +173,8 @@ def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
169
  _resolve_image_path(pair["org_img"]),
170
  _resolve_image_path(pair["model1_path"]),
171
  _resolve_image_path(pair["model2_path"]),
172
- 3,
173
- 3,
174
- 3,
175
- 3,
176
  )
177
 
178
 
@@ -180,10 +182,14 @@ def on_submit(
180
  task_name: str,
181
  index: int,
182
  pairs: List[Dict[str, str]],
183
- physical_score: int,
184
- optical_score: int,
185
- semantic_score: int,
186
- overall_score: int,
 
 
 
 
187
  ):
188
  if not task_name:
189
  raise gr.Error("请先选择任务。")
@@ -193,10 +199,16 @@ def on_submit(
193
 
194
  pair = pairs[index]
195
  score_map = {
196
- "physical_interaction_fidelity_score": int(physical_score),
197
- "optical_effect_accuracy_score": int(optical_score),
198
- "semantic_functional_alignment_score": int(semantic_score),
199
- "overall_photorealism_score": int(overall_score),
 
 
 
 
 
 
200
  }
201
  _append_evaluation(task_name, pair, score_map)
202
 
@@ -212,10 +224,8 @@ def on_submit(
212
  _resolve_image_path(pair["org_img"]),
213
  _resolve_image_path(pair["model1_path"]),
214
  _resolve_image_path(pair["model2_path"]),
215
- 3,
216
- 3,
217
- 3,
218
- 3,
219
  gr.update(value=info + f" 自动跳转到下一组({next_index + 1}/{len(pairs)})。"),
220
  )
221
 
@@ -225,10 +235,8 @@ def on_submit(
225
  gr.update(),
226
  gr.update(),
227
  gr.update(),
228
- 3,
229
- 3,
230
- 3,
231
- 3,
232
  gr.update(value=info + " 已经是最后一组。"),
233
  )
234
 
@@ -262,21 +270,24 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
262
 
263
  pair_header = gr.Markdown("")
264
 
 
265
  with gr.Row():
266
- with gr.Column(scale=1):
267
  orig_image = gr.Image(type="filepath", label="原图 Original", interactive=False)
268
- with gr.Column(scale=1):
269
- model1_image = gr.Image(type="filepath", label="模型 A 输出", interactive=False)
270
- with gr.Column(scale=1):
271
- model2_image = gr.Image(type="filepath", label="模型 B 输出", interactive=False)
272
 
273
  with gr.Row():
274
- with gr.Column():
275
- physical_input = gr.Slider(1, 5, value=3, step=1, label="物理交互保真度 (Physical Interaction Fidelity)")
276
- optical_input = gr.Slider(1, 5, value=3, step=1, label="光学效应准确度 (Optical Effect Accuracy)")
277
- with gr.Column():
278
- semantic_input = gr.Slider(1, 5, value=3, step=1, label="语义/功能对齐度 (Semantic/Functional Alignment)")
279
- overall_input = gr.Slider(1, 5, value=3, step=1, label="整体真实感 (Overall Photorealism)")
 
 
 
 
 
 
280
 
281
  submit_button = gr.Button("Submit Evaluation", variant="primary")
282
  feedback_box = gr.Markdown("")
@@ -292,10 +303,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
292
  orig_image,
293
  model1_image,
294
  model2_image,
295
- physical_input,
296
- optical_input,
297
- semantic_input,
298
- overall_input,
 
 
 
 
299
  feedback_box,
300
  ],
301
  )
@@ -309,10 +324,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
309
  orig_image,
310
  model1_image,
311
  model2_image,
312
- physical_input,
313
- optical_input,
314
- semantic_input,
315
- overall_input,
 
 
 
 
316
  ],
317
  )
318
 
@@ -322,10 +341,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
322
  task_selector,
323
  index_slider,
324
  pair_state,
325
- physical_input,
326
- optical_input,
327
- semantic_input,
328
- overall_input,
 
 
 
 
329
  ],
330
  outputs=[
331
  index_slider,
@@ -333,10 +356,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
333
  orig_image,
334
  model1_image,
335
  model2_image,
336
- physical_input,
337
- optical_input,
338
- semantic_input,
339
- overall_input,
 
 
 
 
340
  feedback_box,
341
  ],
342
  )
@@ -352,10 +379,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
352
  orig_image,
353
  model1_image,
354
  model2_image,
355
- physical_input,
356
- optical_input,
357
- semantic_input,
358
- overall_input,
 
 
 
 
359
  feedback_box,
360
  ],
361
  )
 
90
  return pairs
91
 
92
 
93
+ def _format_pair_header(_pair: Dict[str, str]) -> str:
94
+ # Mask model identity in UI; keep header neutral
95
+ return ""
 
 
 
96
 
97
 
98
  def _append_evaluation(task_name: str, pair: Dict[str, str], scores: Dict[str, int]) -> None:
 
110
  "model2_res",
111
  "model1_path",
112
  "model2_path",
113
+ # Per-image scores for Model A (输出A)
114
+ "model1_physical_interaction_fidelity_score",
115
+ "model1_optical_effect_accuracy_score",
116
+ "model1_semantic_functional_alignment_score",
117
+ "model1_overall_photorealism_score",
118
+ # Per-image scores for Model B (输出B)
119
+ "model2_physical_interaction_fidelity_score",
120
+ "model2_optical_effect_accuracy_score",
121
+ "model2_semantic_functional_alignment_score",
122
+ "model2_overall_photorealism_score",
123
  ]
124
 
125
  with open(csv_path, "a", newline="", encoding="utf-8") as csv_file:
 
146
  pairs = load_task(task_name)
147
  pair = pairs[0]
148
  header = _format_pair_header(pair)
149
+ # Defaults for A and B (8 sliders total)
150
+ default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
151
  return (
152
  pairs,
153
  gr.update(value=0, minimum=0, maximum=len(pairs) - 1, visible=(len(pairs) > 1)),
 
173
  _resolve_image_path(pair["org_img"]),
174
  _resolve_image_path(pair["model1_path"]),
175
  _resolve_image_path(pair["model2_path"]),
176
+ 3, 3, 3, 3, # A
177
+ 3, 3, 3, 3, # B
 
 
178
  )
179
 
180
 
 
182
  task_name: str,
183
  index: int,
184
  pairs: List[Dict[str, str]],
185
+ a_physical_score: int,
186
+ a_optical_score: int,
187
+ a_semantic_score: int,
188
+ a_overall_score: int,
189
+ b_physical_score: int,
190
+ b_optical_score: int,
191
+ b_semantic_score: int,
192
+ b_overall_score: int,
193
  ):
194
  if not task_name:
195
  raise gr.Error("请先选择任务。")
 
199
 
200
  pair = pairs[index]
201
  score_map = {
202
+ # Model A
203
+ "model1_physical_interaction_fidelity_score": int(a_physical_score),
204
+ "model1_optical_effect_accuracy_score": int(a_optical_score),
205
+ "model1_semantic_functional_alignment_score": int(a_semantic_score),
206
+ "model1_overall_photorealism_score": int(a_overall_score),
207
+ # Model B
208
+ "model2_physical_interaction_fidelity_score": int(b_physical_score),
209
+ "model2_optical_effect_accuracy_score": int(b_optical_score),
210
+ "model2_semantic_functional_alignment_score": int(b_semantic_score),
211
+ "model2_overall_photorealism_score": int(b_overall_score),
212
  }
213
  _append_evaluation(task_name, pair, score_map)
214
 
 
224
  _resolve_image_path(pair["org_img"]),
225
  _resolve_image_path(pair["model1_path"]),
226
  _resolve_image_path(pair["model2_path"]),
227
+ 3, 3, 3, 3,
228
+ 3, 3, 3, 3,
 
 
229
  gr.update(value=info + f" 自动跳转到下一组({next_index + 1}/{len(pairs)})。"),
230
  )
231
 
 
235
  gr.update(),
236
  gr.update(),
237
  gr.update(),
238
+ 3, 3, 3, 3,
239
+ 3, 3, 3, 3,
 
 
240
  gr.update(value=info + " 已经是最后一组。"),
241
  )
242
 
 
270
 
271
  pair_header = gr.Markdown("")
272
 
273
+ # Layout: Original on top, two outputs below with their own sliders
274
  with gr.Row():
275
+ with gr.Column(scale=12):
276
  orig_image = gr.Image(type="filepath", label="原图 Original", interactive=False)
 
 
 
 
277
 
278
  with gr.Row():
279
+ with gr.Column(scale=6):
280
+ model1_image = gr.Image(type="filepath", label="模型 A 输出", interactive=False)
281
+ a_physical_input = gr.Slider(1, 5, value=3, step=1, label="A: 物理交互保真度")
282
+ a_optical_input = gr.Slider(1, 5, value=3, step=1, label="A: 光学效应准确度")
283
+ a_semantic_input = gr.Slider(1, 5, value=3, step=1, label="A: 语义/功能对齐度")
284
+ a_overall_input = gr.Slider(1, 5, value=3, step=1, label="A: 整体真实感")
285
+ with gr.Column(scale=6):
286
+ model2_image = gr.Image(type="filepath", label="模型 B 输出", interactive=False)
287
+ b_physical_input = gr.Slider(1, 5, value=3, step=1, label="B: 物理交互保真度")
288
+ b_optical_input = gr.Slider(1, 5, value=3, step=1, label="B: 光学效应准确度")
289
+ b_semantic_input = gr.Slider(1, 5, value=3, step=1, label="B: 语义/功能对齐度")
290
+ b_overall_input = gr.Slider(1, 5, value=3, step=1, label="B: 整体真实感")
291
 
292
  submit_button = gr.Button("Submit Evaluation", variant="primary")
293
  feedback_box = gr.Markdown("")
 
303
  orig_image,
304
  model1_image,
305
  model2_image,
306
+ a_physical_input,
307
+ a_optical_input,
308
+ a_semantic_input,
309
+ a_overall_input,
310
+ b_physical_input,
311
+ b_optical_input,
312
+ b_semantic_input,
313
+ b_overall_input,
314
  feedback_box,
315
  ],
316
  )
 
324
  orig_image,
325
  model1_image,
326
  model2_image,
327
+ a_physical_input,
328
+ a_optical_input,
329
+ a_semantic_input,
330
+ a_overall_input,
331
+ b_physical_input,
332
+ b_optical_input,
333
+ b_semantic_input,
334
+ b_overall_input,
335
  ],
336
  )
337
 
 
341
  task_selector,
342
  index_slider,
343
  pair_state,
344
+ a_physical_input,
345
+ a_optical_input,
346
+ a_semantic_input,
347
+ a_overall_input,
348
+ b_physical_input,
349
+ b_optical_input,
350
+ b_semantic_input,
351
+ b_overall_input,
352
  ],
353
  outputs=[
354
  index_slider,
 
356
  orig_image,
357
  model1_image,
358
  model2_image,
359
+ a_physical_input,
360
+ a_optical_input,
361
+ a_semantic_input,
362
+ a_overall_input,
363
+ b_physical_input,
364
+ b_optical_input,
365
+ b_semantic_input,
366
+ b_overall_input,
367
  feedback_box,
368
  ],
369
  )
 
379
  orig_image,
380
  model1_image,
381
  model2_image,
382
+ a_physical_input,
383
+ a_optical_input,
384
+ a_semantic_input,
385
+ a_overall_input,
386
+ b_physical_input,
387
+ b_optical_input,
388
+ b_semantic_input,
389
+ b_overall_input,
390
  feedback_box,
391
  ],
392
  )