File size: 6,340 Bytes
6557c00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""Gradio app for the text-to-HDR pairwise user study.

Each rater is shown 90 stacked-bracket comparisons (Method A on top vs
Method B on bottom, methods hidden) and asked which set looks more
natural. Votes are appended to a private HuggingFace dataset.

Deploy as a free HF Space:
  - app.py, requirements.txt, pairs.json, prompts.json, pairs/*.png

The app does NOT reveal which method is on top β€” the assignment is
recorded in pairs.json and joined post-hoc when scoring.
"""
import json
import os
import random
import uuid
from datetime import datetime, timezone
from pathlib import Path

import gradio as gr
from huggingface_hub import HfApi

ROOT = Path(__file__).resolve().parent
PAIRS = json.loads((ROOT / "pairs.json").read_text())
PROMPTS = {p["n"]: p for p in json.loads((ROOT / "prompts.json").read_text())}

# Configure via Space secrets:
HF_TOKEN = os.environ.get("HF_TOKEN")              # write-scoped token
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "")  # e.g. "naomi/t2hdr-user-study-votes"

CHOICES = [
    ("Top is much better",      "top_much"),
    ("Top is slightly better",  "top_slight"),
    ("Tie / cannot tell",       "tie"),
    ("Bottom is slightly better", "bot_slight"),
    ("Bottom is much better",   "bot_much"),
]


def _hf_upload(path: Path, repo_path: str) -> None:
    """Upload a single file to the HF dataset repo."""
    if not HF_TOKEN or not HF_DATASET_REPO:
        return  # local dev β€” skip upload
    api = HfApi(token=HF_TOKEN)
    api.upload_file(
        path_or_fileobj=str(path),
        path_in_repo=repo_path,
        repo_id=HF_DATASET_REPO,
        repo_type="dataset",
    )


def new_session() -> dict:
    """Initialize a rater session: rater_id + shuffled pair order."""
    rater_id = uuid.uuid4().hex[:12]
    order = list(range(len(PAIRS)))
    random.shuffle(order)
    return {"rater_id": rater_id, "order": order, "idx": 0, "votes": []}


def _stim_for(pair_idx: int, position: int) -> tuple[str, str, str]:
    """pair_idx = index into PAIRS; position = 0-based position in rater queue."""
    pair = PAIRS[pair_idx]
    img = str(ROOT / pair["image"])
    prompt = PROMPTS[pair["prompt_n"]]
    progress = f"**Pair {position + 1} / {len(PAIRS)}**"
    caption = (
        f"**Prompt #{prompt['n']:03d} β€” {prompt['cat']}**\n\n"
        f"{prompt['text']}\n\n"
        f"Which set of 3 exposures looks more natural and more like a "
        f"high-quality photograph of the scene above?"
    )
    return img, caption, progress


def start_session():
    state = new_session()
    img, caption, progress = _stim_for(state["order"][0], state["idx"])
    return state, img, caption, progress, gr.update(visible=False), gr.update(visible=True)


def cast_vote(state: dict, choice_label: str):
    print(f"[cast_vote] state.idx={state['idx'] if state else None} choice={choice_label!r}", flush=True)
    if state is None or "order" not in state:
        return (state, None, "Click **Begin** to start.", "",
                gr.update(visible=True), gr.update(visible=False), gr.update(value=None))

    choice_value = dict(CHOICES).get(choice_label, "tie")
    pair = PAIRS[state["order"][state["idx"]]]
    record = {
        "rater_id": state["rater_id"],
        "pair_id": pair["pair_id"],
        "prompt_n": pair["prompt_n"],
        "top_method": pair["top_method"],
        "bottom_method": pair["bottom_method"],
        "choice": choice_value,
        "ts": datetime.now(timezone.utc).isoformat(),
    }
    # Build a NEW state dict (immutable update) so Gradio detects the change.
    new_state = {
        "rater_id": state["rater_id"],
        "order": state["order"],
        "idx": state["idx"] + 1,
        "votes": state["votes"] + [record],
    }
    print(f"[cast_vote] new_state.idx={new_state['idx']} votes_count={len(new_state['votes'])}", flush=True)

    if new_state["idx"] >= len(PAIRS):
        out = ROOT / f"votes_{new_state['rater_id']}.jsonl"
        out.write_text("\n".join(json.dumps(v) for v in new_state["votes"]) + "\n")
        try:
            _hf_upload(out, f"votes/votes_{new_state['rater_id']}.jsonl")
        except Exception as e:
            print(f"upload failed: {e}")
        return (
            new_state, None,
            f"### βœ… Done β€” thank you!\n\n"
            f"Your rater id: `{new_state['rater_id']}`. "
            f"You can close this tab.",
            f"{len(new_state['votes'])} / {len(PAIRS)} complete",
            gr.update(visible=False), gr.update(visible=False), gr.update(value=None),
        )

    img, caption, progress = _stim_for(new_state["order"][new_state["idx"]], new_state["idx"])
    # Reset the radio so the rater has to actively choose for each pair.
    return (new_state, img, caption, progress,
            gr.update(visible=False), gr.update(visible=True), gr.update(value=None))


with gr.Blocks(title="Text-to-HDR study") as demo:
    gr.Markdown(
        """
        # Text-to-HDR β€” pairwise comparison

        You will see **30 image pairs**. Each pair has two stacked
        rows: top and bottom. Each row shows the same scene at three
        exposures (dark / normal / bright). Your task is to pick which
        row looks more natural and more like a real high-quality
        photograph of the prompt.

        Pick a choice in the radio, then click **Next pair**.
        Take your time. Total time β‰ˆ 3–5 minutes.
        """
    )
    state = gr.State(None)

    start_panel = gr.Group(visible=True)
    with start_panel:
        start_btn = gr.Button("Begin", variant="primary")

    rate_panel = gr.Group(visible=False)
    with rate_panel:
        progress = gr.Markdown("")
        caption = gr.Markdown("")
        image = gr.Image(label="", interactive=False, height=600)
        choice_radio = gr.Radio(
            choices=[label for label, _ in CHOICES],
            label="Your judgement",
            value=None,
        )
        submit_btn = gr.Button("Next pair", variant="primary")

    submit_btn.click(
        cast_vote,
        inputs=[state, choice_radio],
        outputs=[state, image, caption, progress, start_panel, rate_panel, choice_radio],
    )

    start_btn.click(
        start_session,
        outputs=[state, image, caption, progress, start_panel, rate_panel],
    )


if __name__ == "__main__":
    demo.launch()