Spaces:
Sleeping
Sleeping
ミスを発券
Browse files
app.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import glob
|
| 4 |
import json
|
| 5 |
import random
|
| 6 |
-
import re
|
| 7 |
from functools import partial
|
| 8 |
from datetime import datetime
|
| 9 |
from collections import defaultdict, Counter
|
| 10 |
|
| 11 |
import gradio as gr
|
| 12 |
from loguru import logger
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
# --- Global State
|
| 15 |
GLOBAL_STATE = {
|
| 16 |
"participant_id": None,
|
| 17 |
"data_loaded": False,
|
|
@@ -28,9 +33,10 @@ GLOBAL_STATE = {
|
|
| 28 |
"current_ranks": {},
|
| 29 |
"current_absolute_score": None,
|
| 30 |
"current_absolute_score_worst": None,
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
-
# --- Configuration
|
| 34 |
BASE_RESULTS_DIR = "./results"
|
| 35 |
LOG_DIR = "./logs"
|
| 36 |
COMBINED_DATA_DIR = "./combined_data"
|
|
@@ -52,7 +58,34 @@ CRITERIA_GUIDANCE_EN = [
|
|
| 52 |
IMAGE_LABELS = ['A', 'B', 'C', 'D', 'E']
|
| 53 |
|
| 54 |
|
| 55 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
def get_image_path_from_prediction(prediction: dict) -> str:
|
| 57 |
if not GLOBAL_STATE["image_mapping"]:
|
| 58 |
logger.error("Image mapping is not loaded.")
|
|
@@ -71,6 +104,15 @@ def get_image_path_from_prediction(prediction: dict) -> str:
|
|
| 71 |
|
| 72 |
|
| 73 |
def load_evaluation_data(participant_id: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
mapping_path = os.path.join(COMBINED_DATA_DIR, MAPPING_FILENAME)
|
| 75 |
if not os.path.exists(mapping_path):
|
| 76 |
return f"<p class='feedback red'>Error: Mapping file not found at {mapping_path}</p>", gr.update(
|
|
@@ -182,8 +224,6 @@ def handle_absolute_score_worst_click(score):
|
|
| 182 |
|
| 183 |
|
| 184 |
# --- UI Logic ---
|
| 185 |
-
|
| 186 |
-
# ▼▼▼ 修正後の display_current_prompt_and_criterion 関数 ▼▼▼
|
| 187 |
def display_current_prompt_and_criterion():
|
| 188 |
if not GLOBAL_STATE["data_loaded"] or GLOBAL_STATE["current_prompt_index"] >= len(GLOBAL_STATE["all_eval_data"]):
|
| 189 |
done_msg = "<p class='feedback green' style='text-align: center; font-size: 1.2em;'>All prompts have been evaluated! Please proceed to the 'Export' tab. <br>すべてのプロンプトの評価が完了しました!「エクスポート」タブに進んでください。</p>"
|
|
@@ -212,16 +252,11 @@ def display_current_prompt_and_criterion():
|
|
| 212 |
criterion_name = CRITERIA[criterion_idx]
|
| 213 |
|
| 214 |
progress_text = f"Prompt {GLOBAL_STATE['current_prompt_index'] + 1} / {len(GLOBAL_STATE['all_eval_data'])} - **{criterion_name}**"
|
| 215 |
-
|
| 216 |
-
# ▼▼▼ 修正1: プロンプト表示のフォントサイズを3倍に変更 ▼▼▼
|
| 217 |
prompt_display_text = f"<p style='font-size: 3em; font-weight: bold;'>テキスト(TEXT): {prompt_text}</p>"
|
| 218 |
-
|
| 219 |
-
# ▼▼▼ 修正2: 統合された指示文を生成(注意文の変更) ▼▼▼
|
| 220 |
guidance_part = (
|
| 221 |
f"<p style='color: red; font-weight: bold; font-size: 1.1em;'>"
|
| 222 |
f"5つの画像を、「{CRITERIA_GUIDANCE_JP[criterion_idx]}」を基準にランキングしてください。<br>"
|
| 223 |
f"Please rank the 5 images based on {CRITERIA_GUIDANCE_EN[criterion_idx]}"
|
| 224 |
-
|
| 225 |
f"</p>"
|
| 226 |
)
|
| 227 |
rules_part = (
|
|
@@ -240,7 +275,6 @@ def display_current_prompt_and_criterion():
|
|
| 240 |
"<p style='font-size: 0.9em; color: #555;'>"
|
| 241 |
"設問や英単語の意味についてはAIに質問したり検索したりしても構いません。ただし、画像そのものが示す感情をAIに質問するのはお控えください。<br>"
|
| 242 |
"You are welcome to use AI or web search to understand the questions or the meaning of English words. However, please refrain from asking an AI about the emotion shown in the images themselves."
|
| 243 |
-
|
| 244 |
"</p>"
|
| 245 |
)
|
| 246 |
combined_instructions = f"{guidance_part}<hr>{rules_part}<hr>{ai_note_part}"
|
|
@@ -257,7 +291,18 @@ def display_current_prompt_and_criterion():
|
|
| 257 |
for cond_name in current_image_order:
|
| 258 |
prediction = current_data["predictions"][cond_name]
|
| 259 |
img_path = get_image_path_from_prediction(prediction)
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
saved_ranks_dict = GLOBAL_STATE["evaluation_results"].get(prompt_text, {}).get("ranks", {}).get(criterion_name)
|
| 263 |
if saved_ranks_dict:
|
|
@@ -307,7 +352,7 @@ def display_current_prompt_and_criterion():
|
|
| 307 |
]
|
| 308 |
|
| 309 |
|
| 310 |
-
#
|
| 311 |
def validate_and_navigate():
|
| 312 |
ranks = GLOBAL_STATE["current_ranks"]
|
| 313 |
error_msg = None
|
|
@@ -406,7 +451,6 @@ def navigate_previous():
|
|
| 406 |
return display_current_prompt_and_criterion()
|
| 407 |
|
| 408 |
|
| 409 |
-
# ▼▼▼ 修正後の export_results 関数 (変更なし) ▼▼▼
|
| 410 |
def export_results(participant_id, alignment_reason, naturalness_reason, attractiveness_reason, optional_comment):
|
| 411 |
if not alignment_reason.strip() or not naturalness_reason.strip() or not attractiveness_reason.strip():
|
| 412 |
error_msg = "<p class='feedback red'>Please fill in the reasoning for all three criteria (Alignment, Naturalness, Attractiveness). / 3つの評価基準(一致度, 自然さ, 魅力度)すべての判断理由を記入してください。</p>"
|
|
@@ -480,7 +524,6 @@ def export_results(participant_id, alignment_reason, naturalness_reason, attract
|
|
| 480 |
return gr.update(value=filepath, visible=True), status_message
|
| 481 |
|
| 482 |
|
| 483 |
-
# ▼▼▼ 修正後の create_gradio_interface 関数 (変更なし) ▼▼▼
|
| 484 |
def create_gradio_interface():
|
| 485 |
css = """
|
| 486 |
.gradio-container { font-family: 'Arial', sans-serif; }
|
|
@@ -564,7 +607,7 @@ def create_gradio_interface():
|
|
| 564 |
with gr.Column(scale=1):
|
| 565 |
with gr.Group():
|
| 566 |
gr.Markdown(f"<div class='image-label' style='text-align: center;'>{label}</div>")
|
| 567 |
-
img = gr.Image(type="
|
| 568 |
image_components.append(img)
|
| 569 |
with gr.Row(elem_classes="rank-btn-row"):
|
| 570 |
rank_list = ["1位", "2位", "3位", "4位", "5位"]
|
|
@@ -637,7 +680,7 @@ def create_gradio_interface():
|
|
| 637 |
download_file = gr.File(label="Download JSON", visible=False)
|
| 638 |
export_status = gr.Markdown()
|
| 639 |
|
| 640 |
-
# --- Event Handlers
|
| 641 |
def check_and_confirm_id(pid):
|
| 642 |
pid = pid.strip()
|
| 643 |
if re.fullmatch(r"P\d{2}", pid):
|
|
|
|
| 1 |
+
# ==============================================================================
|
| 2 |
+
# evaluation_interface のコードブロック(修正済み)
|
| 3 |
+
# ==============================================================================
|
| 4 |
import os
|
| 5 |
import sys
|
| 6 |
import glob
|
| 7 |
import json
|
| 8 |
import random
|
|
|
|
| 9 |
from functools import partial
|
| 10 |
from datetime import datetime
|
| 11 |
from collections import defaultdict, Counter
|
| 12 |
|
| 13 |
import gradio as gr
|
| 14 |
from loguru import logger
|
| 15 |
+
from PIL import Image # ★ 修正点1: PILライブラリをインポート
|
| 16 |
+
import re
|
| 17 |
+
|
| 18 |
|
| 19 |
+
# --- Global State ---
|
| 20 |
GLOBAL_STATE = {
|
| 21 |
"participant_id": None,
|
| 22 |
"data_loaded": False,
|
|
|
|
| 33 |
"current_ranks": {},
|
| 34 |
"current_absolute_score": None,
|
| 35 |
"current_absolute_score_worst": None,
|
| 36 |
+
"hide_bbox_dict": {}, # ★ 修正点2: マスキング情報を格納するキーを追加
|
| 37 |
}
|
| 38 |
|
| 39 |
+
# --- Configuration ---
|
| 40 |
BASE_RESULTS_DIR = "./results"
|
| 41 |
LOG_DIR = "./logs"
|
| 42 |
COMBINED_DATA_DIR = "./combined_data"
|
|
|
|
| 58 |
IMAGE_LABELS = ['A', 'B', 'C', 'D', 'E']
|
| 59 |
|
| 60 |
|
| 61 |
+
# ★ 修正点3: マスキング用のヘルパー関数を classification_interface からコピー
|
| 62 |
+
# --- Helper Functions ---
|
| 63 |
+
def load_bbox_json(bbox_json_path):
|
| 64 |
+
"""バウンディングボックス情報をJSONファイルから読み込む"""
|
| 65 |
+
try:
|
| 66 |
+
with open(bbox_json_path, 'r', encoding='utf-8') as f:
|
| 67 |
+
bbox_data = json.load(f)
|
| 68 |
+
# evaluation_interface では hide (隠す領域) のみ必要
|
| 69 |
+
GLOBAL_STATE["hide_bbox_dict"] = bbox_data.get("Hide", {})
|
| 70 |
+
logger.info(f"Successfully loaded bounding box data from {bbox_json_path}")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
logger.error(f"Failed to load bounding box JSON: {e}")
|
| 73 |
+
GLOBAL_STATE["hide_bbox_dict"] = {}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def create_masked_image(image: Image.Image):
|
| 77 |
+
"""画像に黒塗りのマスクを適用する"""
|
| 78 |
+
hide_bbox_dict = GLOBAL_STATE.get("hide_bbox_dict", {})
|
| 79 |
+
if not hide_bbox_dict:
|
| 80 |
+
return image
|
| 81 |
+
masked_img = image.copy()
|
| 82 |
+
for _, box_coords in hide_bbox_dict.items():
|
| 83 |
+
box = (box_coords['left'], box_coords['top'], box_coords['right'], box_coords['bottom'])
|
| 84 |
+
black_rectangle = Image.new('RGB', (box[2] - box[0], box[3] - box[1]), color='black')
|
| 85 |
+
masked_img.paste(black_rectangle, (box[0], box[1]))
|
| 86 |
+
return masked_img
|
| 87 |
+
|
| 88 |
+
|
| 89 |
def get_image_path_from_prediction(prediction: dict) -> str:
|
| 90 |
if not GLOBAL_STATE["image_mapping"]:
|
| 91 |
logger.error("Image mapping is not loaded.")
|
|
|
|
| 104 |
|
| 105 |
|
| 106 |
def load_evaluation_data(participant_id: str):
|
| 107 |
+
# ★ 修正点4: バウンディングボックス情報を読み込む処理を追加
|
| 108 |
+
# classification_interfaceのパス構造を参考に、bboxファイルのパスを構築
|
| 109 |
+
bbox_json_path = os.path.join(COMBINED_DATA_DIR, "lapwing", "texts", "bounding_boxes.json")
|
| 110 |
+
if os.path.exists(bbox_json_path):
|
| 111 |
+
load_bbox_json(bbox_json_path)
|
| 112 |
+
else:
|
| 113 |
+
logger.warning(f"Bounding box file not found at {bbox_json_path}. Images will not be masked.")
|
| 114 |
+
GLOBAL_STATE["hide_bbox_dict"] = {}
|
| 115 |
+
|
| 116 |
mapping_path = os.path.join(COMBINED_DATA_DIR, MAPPING_FILENAME)
|
| 117 |
if not os.path.exists(mapping_path):
|
| 118 |
return f"<p class='feedback red'>Error: Mapping file not found at {mapping_path}</p>", gr.update(
|
|
|
|
| 224 |
|
| 225 |
|
| 226 |
# --- UI Logic ---
|
|
|
|
|
|
|
| 227 |
def display_current_prompt_and_criterion():
|
| 228 |
if not GLOBAL_STATE["data_loaded"] or GLOBAL_STATE["current_prompt_index"] >= len(GLOBAL_STATE["all_eval_data"]):
|
| 229 |
done_msg = "<p class='feedback green' style='text-align: center; font-size: 1.2em;'>All prompts have been evaluated! Please proceed to the 'Export' tab. <br>すべてのプロンプトの評価が完了しました!「エクスポート」タブに進んでください。</p>"
|
|
|
|
| 252 |
criterion_name = CRITERIA[criterion_idx]
|
| 253 |
|
| 254 |
progress_text = f"Prompt {GLOBAL_STATE['current_prompt_index'] + 1} / {len(GLOBAL_STATE['all_eval_data'])} - **{criterion_name}**"
|
|
|
|
|
|
|
| 255 |
prompt_display_text = f"<p style='font-size: 3em; font-weight: bold;'>テキスト(TEXT): {prompt_text}</p>"
|
|
|
|
|
|
|
| 256 |
guidance_part = (
|
| 257 |
f"<p style='color: red; font-weight: bold; font-size: 1.1em;'>"
|
| 258 |
f"5つの画像を、「{CRITERIA_GUIDANCE_JP[criterion_idx]}」を基準にランキングしてください。<br>"
|
| 259 |
f"Please rank the 5 images based on {CRITERIA_GUIDANCE_EN[criterion_idx]}"
|
|
|
|
| 260 |
f"</p>"
|
| 261 |
)
|
| 262 |
rules_part = (
|
|
|
|
| 275 |
"<p style='font-size: 0.9em; color: #555;'>"
|
| 276 |
"設問や英単語の意味についてはAIに質問したり検索したりしても構いません。ただし、画像そのものが示す感情をAIに質問するのはお控えください。<br>"
|
| 277 |
"You are welcome to use AI or web search to understand the questions or the meaning of English words. However, please refrain from asking an AI about the emotion shown in the images themselves."
|
|
|
|
| 278 |
"</p>"
|
| 279 |
)
|
| 280 |
combined_instructions = f"{guidance_part}<hr>{rules_part}<hr>{ai_note_part}"
|
|
|
|
| 291 |
for cond_name in current_image_order:
|
| 292 |
prediction = current_data["predictions"][cond_name]
|
| 293 |
img_path = get_image_path_from_prediction(prediction)
|
| 294 |
+
|
| 295 |
+
# ★ 修正点5: 画像を読み込み、マスキングを適用する
|
| 296 |
+
if img_path and os.path.exists(img_path):
|
| 297 |
+
try:
|
| 298 |
+
pil_img = Image.open(img_path).convert('RGB')
|
| 299 |
+
masked_img = create_masked_image(pil_img)
|
| 300 |
+
image_updates.append(gr.update(value=masked_img))
|
| 301 |
+
except Exception as e:
|
| 302 |
+
logger.error(f"Failed to open or mask image {img_path}: {e}")
|
| 303 |
+
image_updates.append(gr.update(value=None))
|
| 304 |
+
else:
|
| 305 |
+
image_updates.append(gr.update(value=None))
|
| 306 |
|
| 307 |
saved_ranks_dict = GLOBAL_STATE["evaluation_results"].get(prompt_text, {}).get("ranks", {}).get(criterion_name)
|
| 308 |
if saved_ranks_dict:
|
|
|
|
| 352 |
]
|
| 353 |
|
| 354 |
|
| 355 |
+
# --- (以降の関数は変更なし) ---
|
| 356 |
def validate_and_navigate():
|
| 357 |
ranks = GLOBAL_STATE["current_ranks"]
|
| 358 |
error_msg = None
|
|
|
|
| 451 |
return display_current_prompt_and_criterion()
|
| 452 |
|
| 453 |
|
|
|
|
| 454 |
def export_results(participant_id, alignment_reason, naturalness_reason, attractiveness_reason, optional_comment):
|
| 455 |
if not alignment_reason.strip() or not naturalness_reason.strip() or not attractiveness_reason.strip():
|
| 456 |
error_msg = "<p class='feedback red'>Please fill in the reasoning for all three criteria (Alignment, Naturalness, Attractiveness). / 3つの評価基準(一致度, 自然さ, 魅力度)すべての判断理由を記入してください。</p>"
|
|
|
|
| 524 |
return gr.update(value=filepath, visible=True), status_message
|
| 525 |
|
| 526 |
|
|
|
|
| 527 |
def create_gradio_interface():
|
| 528 |
css = """
|
| 529 |
.gradio-container { font-family: 'Arial', sans-serif; }
|
|
|
|
| 607 |
with gr.Column(scale=1):
|
| 608 |
with gr.Group():
|
| 609 |
gr.Markdown(f"<div class='image-label' style='text-align: center;'>{label}</div>")
|
| 610 |
+
img = gr.Image(type="pil", show_label=False, height=300) # typeを"pil"に変更
|
| 611 |
image_components.append(img)
|
| 612 |
with gr.Row(elem_classes="rank-btn-row"):
|
| 613 |
rank_list = ["1位", "2位", "3位", "4位", "5位"]
|
|
|
|
| 680 |
download_file = gr.File(label="Download JSON", visible=False)
|
| 681 |
export_status = gr.Markdown()
|
| 682 |
|
| 683 |
+
# --- Event Handlers ---
|
| 684 |
def check_and_confirm_id(pid):
|
| 685 |
pid = pid.strip()
|
| 686 |
if re.fullmatch(r"P\d{2}", pid):
|
combined_data/lapwing/texts/bounding_boxes.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Show": {
|
| 3 |
+
"Eyebrow": {
|
| 4 |
+
"left": 120,
|
| 5 |
+
"top": 200,
|
| 6 |
+
"right": 392,
|
| 7 |
+
"bottom": 302
|
| 8 |
+
},
|
| 9 |
+
"Eyes": {
|
| 10 |
+
"left": 130,
|
| 11 |
+
"top": 270,
|
| 12 |
+
"right": 382,
|
| 13 |
+
"bottom": 365
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"Hide": {
|
| 17 |
+
"Mouth": {
|
| 18 |
+
"left": 160,
|
| 19 |
+
"top": 409,
|
| 20 |
+
"right": 352,
|
| 21 |
+
"bottom": 482
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"Crop": {
|
| 25 |
+
"Crop": {
|
| 26 |
+
"left": 26,
|
| 27 |
+
"top": 26,
|
| 28 |
+
"right": 486,
|
| 29 |
+
"bottom": 486
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
}
|
logs/evaluation_ui_log_2025-07-31_11-17-08_603378.log
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-31 11:17:27.491 | INFO | __main__:load_bbox_json:70 - Successfully loaded bounding box data from ./combined_data\lapwing\texts\bounding_boxes.json
|
| 2 |
+
2025-07-31 11:17:27.493 | INFO | __main__:load_evaluation_data:124 - Successfully loaded image mapping. Image directory: ./combined_data\lapwing\images
|
| 3 |
+
2025-07-31 11:17:27.520 | INFO | __main__:load_evaluation_data:172 - Loaded and merged data for 2 prompts.
|
| 4 |
+
2025-07-31 11:17:49.041 | INFO | __main__:validate_and_navigate:429 - Saved rank for P:P99, Prompt:'happy', Criterion:Alignment, Ranks:{'Ours': 1, 'w_o_HitL': 5, 'w_o_Proto_Loss': 1, 'LLM-based': 1, 'w_o_Tuning': 4}
|
| 5 |
+
2025-07-31 11:18:00.202 | INFO | __main__:validate_and_navigate:429 - Saved rank for P:P99, Prompt:'happy', Criterion:Naturalness, Ranks:{'Ours': 1, 'w_o_Tuning': 5, 'LLM-based': 1, 'w_o_Proto_Loss': 1, 'w_o_HitL': 4}
|
| 6 |
+
2025-07-31 11:18:17.995 | INFO | __main__:validate_and_navigate:429 - Saved rank for P:P99, Prompt:'happy', Criterion:Attractiveness, Ranks:{'LLM-based': 1, 'Ours': 1, 'w_o_Proto_Loss': 1, 'w_o_Tuning': 5, 'w_o_HitL': 4}
|
| 7 |
+
2025-07-31 11:18:40.255 | INFO | __main__:validate_and_navigate:429 - Saved rank for P:P99, Prompt:'sad', Criterion:Alignment, Ranks:{'LLM-based': 5, 'w_o_HitL': 4, 'Ours': 2, 'w_o_Proto_Loss': 2, 'w_o_Tuning': 1}
|
| 8 |
+
2025-07-31 11:18:53.544 | INFO | __main__:validate_and_navigate:429 - Saved rank for P:P99, Prompt:'sad', Criterion:Naturalness, Ranks:{'w_o_HitL': 4, 'LLM-based': 5, 'w_o_Tuning': 3, 'Ours': 1, 'w_o_Proto_Loss': 1}
|
| 9 |
+
2025-07-31 11:19:04.031 | INFO | __main__:validate_and_navigate:429 - Saved rank for P:P99, Prompt:'sad', Criterion:Attractiveness, Ranks:{'w_o_Tuning': 4, 'LLM-based': 5, 'w_o_HitL': 3, 'Ours': 1, 'w_o_Proto_Loss': 1}
|
| 10 |
+
2025-07-31 11:19:15.845 | INFO | __main__:export_results:511 - Successfully exported results to: ./results\P99\evaluation_results_P99_20250731_111915.json
|
results/P99/evaluation_results_P99_20250731_111915.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"participant_id": "P99",
|
| 4 |
+
"export_timestamp": "2025-07-31T11:19:15.844354",
|
| 5 |
+
"total_prompts_evaluated": 2,
|
| 6 |
+
"evaluation_duration_seconds": 96.511394,
|
| 7 |
+
"reasoning": {
|
| 8 |
+
"alignment": "あいう",
|
| 9 |
+
"naturalness": "あいう",
|
| 10 |
+
"attractiveness": "あいう"
|
| 11 |
+
},
|
| 12 |
+
"optional_comment": "あいう"
|
| 13 |
+
},
|
| 14 |
+
"results": [
|
| 15 |
+
{
|
| 16 |
+
"prompt": "happy",
|
| 17 |
+
"prompt_category": "basic_emotion",
|
| 18 |
+
"image_order_alignment": [
|
| 19 |
+
"Ours",
|
| 20 |
+
"w_o_HitL",
|
| 21 |
+
"w_o_Proto_Loss",
|
| 22 |
+
"LLM-based",
|
| 23 |
+
"w_o_Tuning"
|
| 24 |
+
],
|
| 25 |
+
"image_order_naturalness": [
|
| 26 |
+
"Ours",
|
| 27 |
+
"w_o_Tuning",
|
| 28 |
+
"LLM-based",
|
| 29 |
+
"w_o_Proto_Loss",
|
| 30 |
+
"w_o_HitL"
|
| 31 |
+
],
|
| 32 |
+
"image_order_attractiveness": [
|
| 33 |
+
"LLM-based",
|
| 34 |
+
"Ours",
|
| 35 |
+
"w_o_Proto_Loss",
|
| 36 |
+
"w_o_Tuning",
|
| 37 |
+
"w_o_HitL"
|
| 38 |
+
],
|
| 39 |
+
"alignment_ranks": {
|
| 40 |
+
"Ours": 1,
|
| 41 |
+
"w_o_HitL": 5,
|
| 42 |
+
"w_o_Proto_Loss": 1,
|
| 43 |
+
"LLM-based": 1,
|
| 44 |
+
"w_o_Tuning": 4
|
| 45 |
+
},
|
| 46 |
+
"naturalness_ranks": {
|
| 47 |
+
"Ours": 1,
|
| 48 |
+
"w_o_Tuning": 5,
|
| 49 |
+
"LLM-based": 1,
|
| 50 |
+
"w_o_Proto_Loss": 1,
|
| 51 |
+
"w_o_HitL": 4
|
| 52 |
+
},
|
| 53 |
+
"attractiveness_ranks": {
|
| 54 |
+
"LLM-based": 1,
|
| 55 |
+
"Ours": 1,
|
| 56 |
+
"w_o_Proto_Loss": 1,
|
| 57 |
+
"w_o_Tuning": 5,
|
| 58 |
+
"w_o_HitL": 4
|
| 59 |
+
},
|
| 60 |
+
"alignment_absolute_score": 6,
|
| 61 |
+
"alignment_absolute_score_worst": 2
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"prompt": "sad",
|
| 65 |
+
"prompt_category": "basic_emotion",
|
| 66 |
+
"image_order_alignment": [
|
| 67 |
+
"LLM-based",
|
| 68 |
+
"w_o_HitL",
|
| 69 |
+
"Ours",
|
| 70 |
+
"w_o_Proto_Loss",
|
| 71 |
+
"w_o_Tuning"
|
| 72 |
+
],
|
| 73 |
+
"image_order_naturalness": [
|
| 74 |
+
"w_o_HitL",
|
| 75 |
+
"LLM-based",
|
| 76 |
+
"w_o_Tuning",
|
| 77 |
+
"Ours",
|
| 78 |
+
"w_o_Proto_Loss"
|
| 79 |
+
],
|
| 80 |
+
"image_order_attractiveness": [
|
| 81 |
+
"w_o_Tuning",
|
| 82 |
+
"LLM-based",
|
| 83 |
+
"w_o_HitL",
|
| 84 |
+
"Ours",
|
| 85 |
+
"w_o_Proto_Loss"
|
| 86 |
+
],
|
| 87 |
+
"alignment_ranks": {
|
| 88 |
+
"LLM-based": 5,
|
| 89 |
+
"w_o_HitL": 4,
|
| 90 |
+
"Ours": 2,
|
| 91 |
+
"w_o_Proto_Loss": 2,
|
| 92 |
+
"w_o_Tuning": 1
|
| 93 |
+
},
|
| 94 |
+
"naturalness_ranks": {
|
| 95 |
+
"w_o_HitL": 4,
|
| 96 |
+
"LLM-based": 5,
|
| 97 |
+
"w_o_Tuning": 3,
|
| 98 |
+
"Ours": 1,
|
| 99 |
+
"w_o_Proto_Loss": 1
|
| 100 |
+
},
|
| 101 |
+
"attractiveness_ranks": {
|
| 102 |
+
"w_o_Tuning": 4,
|
| 103 |
+
"LLM-based": 5,
|
| 104 |
+
"w_o_HitL": 3,
|
| 105 |
+
"Ours": 1,
|
| 106 |
+
"w_o_Proto_Loss": 1
|
| 107 |
+
},
|
| 108 |
+
"alignment_absolute_score": 6,
|
| 109 |
+
"alignment_absolute_score_worst": 1
|
| 110 |
+
}
|
| 111 |
+
]
|
| 112 |
+
}
|