Spaces:

peiranli0930
/

VisArena

Sleeping

App Files Files Community

Peiran commited on Oct 22

Commit

ed54e20

1 Parent(s): f801064

UI update: mask model info, new layout (original on top, A/B bottom), per-image 4 scores, and CSV schema update

Browse files

Files changed (1) hide show

app.py +93 -62

app.py CHANGED Viewed

@@ -90,12 +90,9 @@ def load_task(task_name: str):
     return pairs
-def _format_pair_header(pair: Dict[str, str]) -> str:
-    return (
-        f"**Test ID:** {pair['test_id']}  \n"
-        f"**Model A:** {pair['model1_name']} ({pair['model1_res']})  \n"
-        f"**Model B:** {pair['model2_name']} ({pair['model2_res']})"
-    )
 def _append_evaluation(task_name: str, pair: Dict[str, str], scores: Dict[str, int]) -> None:
@@ -113,10 +110,16 @@ def _append_evaluation(task_name: str, pair: Dict[str, str], scores: Dict[str, i
         "model2_res",
         "model1_path",
         "model2_path",
-        "physical_interaction_fidelity_score",
-        "optical_effect_accuracy_score",
-        "semantic_functional_alignment_score",
-        "overall_photorealism_score",
     ]
     with open(csv_path, "a", newline="", encoding="utf-8") as csv_file:
@@ -143,7 +146,8 @@ def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
     pairs = load_task(task_name)
     pair = pairs[0]
     header = _format_pair_header(pair)
-    default_scores = [3, 3, 3, 3]
     return (
         pairs,
         gr.update(value=0, minimum=0, maximum=len(pairs) - 1, visible=(len(pairs) > 1)),
@@ -169,10 +173,8 @@ def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
         _resolve_image_path(pair["org_img"]),
         _resolve_image_path(pair["model1_path"]),
         _resolve_image_path(pair["model2_path"]),
-        3,
-        3,
-        3,
-        3,
     )
@@ -180,10 +182,14 @@ def on_submit(
     task_name: str,
     index: int,
     pairs: List[Dict[str, str]],
-    physical_score: int,
-    optical_score: int,
-    semantic_score: int,
-    overall_score: int,
 ):
     if not task_name:
         raise gr.Error("请先选择任务。")
@@ -193,10 +199,16 @@ def on_submit(
     pair = pairs[index]
     score_map = {
-        "physical_interaction_fidelity_score": int(physical_score),
-        "optical_effect_accuracy_score": int(optical_score),
-        "semantic_functional_alignment_score": int(semantic_score),
-        "overall_photorealism_score": int(overall_score),
     }
     _append_evaluation(task_name, pair, score_map)
@@ -212,10 +224,8 @@ def on_submit(
             _resolve_image_path(pair["org_img"]),
             _resolve_image_path(pair["model1_path"]),
             _resolve_image_path(pair["model2_path"]),
-            3,
-            3,
-            3,
-            3,
             gr.update(value=info + f" 自动跳转到下一组（{next_index + 1}/{len(pairs)}）。"),
         )
@@ -225,10 +235,8 @@ def on_submit(
         gr.update(),
         gr.update(),
         gr.update(),
-        3,
-        3,
-        3,
-        3,
         gr.update(value=info + " 已经是最后一组。"),
     )
@@ -262,21 +270,24 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
     pair_header = gr.Markdown("")
     with gr.Row():
-        with gr.Column(scale=1):
             orig_image = gr.Image(type="filepath", label="原图 Original", interactive=False)
-        with gr.Column(scale=1):
-            model1_image = gr.Image(type="filepath", label="模型 A 输出", interactive=False)
-        with gr.Column(scale=1):
-            model2_image = gr.Image(type="filepath", label="模型 B 输出", interactive=False)
     with gr.Row():
-        with gr.Column():
-            physical_input = gr.Slider(1, 5, value=3, step=1, label="物理交互保真度 (Physical Interaction Fidelity)")
-            optical_input = gr.Slider(1, 5, value=3, step=1, label="光学效应准确度 (Optical Effect Accuracy)")
-        with gr.Column():
-            semantic_input = gr.Slider(1, 5, value=3, step=1, label="语义/功能对齐度 (Semantic/Functional Alignment)")
-            overall_input = gr.Slider(1, 5, value=3, step=1, label="整体真实感 (Overall Photorealism)")
     submit_button = gr.Button("Submit Evaluation", variant="primary")
     feedback_box = gr.Markdown("")
@@ -292,10 +303,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
             orig_image,
             model1_image,
             model2_image,
-            physical_input,
-            optical_input,
-            semantic_input,
-            overall_input,
             feedback_box,
         ],
     )
@@ -309,10 +324,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
             orig_image,
             model1_image,
             model2_image,
-            physical_input,
-            optical_input,
-            semantic_input,
-            overall_input,
         ],
     )
@@ -322,10 +341,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
             task_selector,
             index_slider,
             pair_state,
-            physical_input,
-            optical_input,
-            semantic_input,
-            overall_input,
         ],
         outputs=[
             index_slider,
@@ -333,10 +356,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
             orig_image,
             model1_image,
             model2_image,
-            physical_input,
-            optical_input,
-            semantic_input,
-            overall_input,
             feedback_box,
         ],
     )
@@ -352,10 +379,14 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
             orig_image,
             model1_image,
             model2_image,
-            physical_input,
-            optical_input,
-            semantic_input,
-            overall_input,
             feedback_box,
         ],
     )

     return pairs
+def _format_pair_header(_pair: Dict[str, str]) -> str:
+    # Mask model identity in UI; keep header neutral
+    return ""
 def _append_evaluation(task_name: str, pair: Dict[str, str], scores: Dict[str, int]) -> None:
         "model2_res",
         "model1_path",
         "model2_path",
+        # Per-image scores for Model A (输出A)
+        "model1_physical_interaction_fidelity_score",
+        "model1_optical_effect_accuracy_score",
+        "model1_semantic_functional_alignment_score",
+        "model1_overall_photorealism_score",
+        # Per-image scores for Model B (输出B)
+        "model2_physical_interaction_fidelity_score",
+        "model2_optical_effect_accuracy_score",
+        "model2_semantic_functional_alignment_score",
+        "model2_overall_photorealism_score",
     ]
     with open(csv_path, "a", newline="", encoding="utf-8") as csv_file:
     pairs = load_task(task_name)
     pair = pairs[0]
     header = _format_pair_header(pair)
+    # Defaults for A and B (8 sliders total)
+    default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
     return (
         pairs,
         gr.update(value=0, minimum=0, maximum=len(pairs) - 1, visible=(len(pairs) > 1)),
         _resolve_image_path(pair["org_img"]),
         _resolve_image_path(pair["model1_path"]),
         _resolve_image_path(pair["model2_path"]),
+        3, 3, 3, 3,  # A
+        3, 3, 3, 3,  # B
     )
     task_name: str,
     index: int,
     pairs: List[Dict[str, str]],
+    a_physical_score: int,
+    a_optical_score: int,
+    a_semantic_score: int,
+    a_overall_score: int,
+    b_physical_score: int,
+    b_optical_score: int,
+    b_semantic_score: int,
+    b_overall_score: int,
 ):
     if not task_name:
         raise gr.Error("请先选择任务。")
     pair = pairs[index]
     score_map = {
+        # Model A
+        "model1_physical_interaction_fidelity_score": int(a_physical_score),
+        "model1_optical_effect_accuracy_score": int(a_optical_score),
+        "model1_semantic_functional_alignment_score": int(a_semantic_score),
+        "model1_overall_photorealism_score": int(a_overall_score),
+        # Model B
+        "model2_physical_interaction_fidelity_score": int(b_physical_score),
+        "model2_optical_effect_accuracy_score": int(b_optical_score),
+        "model2_semantic_functional_alignment_score": int(b_semantic_score),
+        "model2_overall_photorealism_score": int(b_overall_score),
     }
     _append_evaluation(task_name, pair, score_map)
             _resolve_image_path(pair["org_img"]),
             _resolve_image_path(pair["model1_path"]),
             _resolve_image_path(pair["model2_path"]),
+            3, 3, 3, 3,
+            3, 3, 3, 3,
             gr.update(value=info + f" 自动跳转到下一组（{next_index + 1}/{len(pairs)}）。"),
         )
         gr.update(),
         gr.update(),
         gr.update(),
+        3, 3, 3, 3,
+        3, 3, 3, 3,
         gr.update(value=info + " 已经是最后一组。"),
     )
     pair_header = gr.Markdown("")
+    # Layout: Original on top, two outputs below with their own sliders
     with gr.Row():
+        with gr.Column(scale=12):
             orig_image = gr.Image(type="filepath", label="原图 Original", interactive=False)
     with gr.Row():
+        with gr.Column(scale=6):
+            model1_image = gr.Image(type="filepath", label="模型 A 输出", interactive=False)
+            a_physical_input = gr.Slider(1, 5, value=3, step=1, label="A: 物理交互保真度")
+            a_optical_input = gr.Slider(1, 5, value=3, step=1, label="A: 光学效应准确度")
+            a_semantic_input = gr.Slider(1, 5, value=3, step=1, label="A: 语义/功能对齐度")
+            a_overall_input = gr.Slider(1, 5, value=3, step=1, label="A: 整体真实感")
+        with gr.Column(scale=6):
+            model2_image = gr.Image(type="filepath", label="模型 B 输出", interactive=False)
+            b_physical_input = gr.Slider(1, 5, value=3, step=1, label="B: 物理交互保真度")
+            b_optical_input = gr.Slider(1, 5, value=3, step=1, label="B: 光学效应准确度")
+            b_semantic_input = gr.Slider(1, 5, value=3, step=1, label="B: 语义/功能对齐度")
+            b_overall_input = gr.Slider(1, 5, value=3, step=1, label="B: 整体真实感")
     submit_button = gr.Button("Submit Evaluation", variant="primary")
     feedback_box = gr.Markdown("")
             orig_image,
             model1_image,
             model2_image,
+            a_physical_input,
+            a_optical_input,
+            a_semantic_input,
+            a_overall_input,
+            b_physical_input,
+            b_optical_input,
+            b_semantic_input,
+            b_overall_input,
             feedback_box,
         ],
     )
             orig_image,
             model1_image,
             model2_image,
+            a_physical_input,
+            a_optical_input,
+            a_semantic_input,
+            a_overall_input,
+            b_physical_input,
+            b_optical_input,
+            b_semantic_input,
+            b_overall_input,
         ],
     )
             task_selector,
             index_slider,
             pair_state,
+            a_physical_input,
+            a_optical_input,
+            a_semantic_input,
+            a_overall_input,
+            b_physical_input,
+            b_optical_input,
+            b_semantic_input,
+            b_overall_input,
         ],
         outputs=[
             index_slider,
             orig_image,
             model1_image,
             model2_image,
+            a_physical_input,
+            a_optical_input,
+            a_semantic_input,
+            a_overall_input,
+            b_physical_input,
+            b_optical_input,
+            b_semantic_input,
+            b_overall_input,
             feedback_box,
         ],
     )
             orig_image,
             model1_image,
             model2_image,
+            a_physical_input,
+            a_optical_input,
+            a_semantic_input,
+            a_overall_input,
+            b_physical_input,
+            b_optical_input,
+            b_semantic_input,
+            b_overall_input,
             feedback_box,
         ],
     )