jmcinern commited on
Commit
8a034d1
·
verified ·
1 Parent(s): 8eafe4d

create app to run on server

Browse files
Files changed (1) hide show
  1. app.py +244 -0
app.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ab_app_k4_two_page.py
2
+ # Two-page Gradio app for open-sourced annotation (Master’s thesis)
3
+ # Page 1: consent + annotator type (Learner/Native) + source (Wiki/Oireachtas)
4
+ # Page 2: task only (QUESTION_MD + A/B), deterministic K=4 per model pair per source
5
+ # Saves: annotator_type, source_type, item info, choice, timestamp
6
+
7
+ import gradio as gr
8
+ import pandas as pd
9
+ import time
10
+ from itertools import combinations
11
+ from pathlib import Path
12
+ import json
13
+ import hashlib
14
+
15
+ PAIRS_CSV = "./outputs/pairs.csv" # columns: run_id, model, source_type, instruction, response, text
16
+
17
+ # --- Config ---
18
+ K = 4
19
+ OUT_FILE = "./annotations.csv"
20
+ SCHEMA = [
21
+ "annotator_type", # Learner | Native
22
+ "source_type", # Wiki | Oireachtas
23
+ "text",
24
+ "model_A",
25
+ "model_B",
26
+ "choice", # A | B
27
+ "instruction_A",
28
+ "response_A",
29
+ "instruction_B",
30
+ "response_B",
31
+ "timestamp",
32
+ ]
33
+ if not Path(OUT_FILE).exists():
34
+ pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
35
+
36
+ pairs_all = pd.read_csv(PAIRS_CSV)
37
+
38
+ # --- Helpers for deterministic schedule ---
39
+ def _shared_texts(df, m1, m2):
40
+ t1 = set(df[df["model"] == m1]["text"])
41
+ t2 = set(df[df["model"] == m2]["text"])
42
+ return list(t1 & t2)
43
+
44
+ def _stable_hash(s: str) -> int:
45
+ return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
46
+
47
+ def build_comparisons_k(source_type: str, k: int):
48
+ df = pairs_all[pairs_all["source_type"] == source_type].copy()
49
+ if df.empty:
50
+ return []
51
+
52
+ models = sorted(df["model"].unique().tolist())
53
+ comps = []
54
+
55
+ # For each unordered pair, pick k texts deterministically; A/B flips alternate (2/2 over k=4)
56
+ for m1, m2 in combinations(models, 2):
57
+ shared = _shared_texts(df, m1, m2)
58
+ if not shared:
59
+ continue
60
+ keyed = [(_stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared]
61
+ keyed.sort(key=lambda x: x[0])
62
+ ordered_texts = [t for _, t in keyed]
63
+
64
+ chosen = []
65
+ idx = 0
66
+ while len(chosen) < k:
67
+ chosen.append(ordered_texts[idx % len(ordered_texts)])
68
+ idx += 1
69
+
70
+ for j, t in enumerate(chosen):
71
+ r1 = df[(df["model"] == m1) & (df["text"] == t)].iloc[0]
72
+ r2 = df[(df["model"] == m2) & (df["text"] == t)].iloc[0]
73
+ if j % 2 == 0:
74
+ A, B = (m1, r1), (m2, r2)
75
+ else:
76
+ A, B = (m2, r2), (m1, r1)
77
+ comps.append(
78
+ {
79
+ "source_type": source_type,
80
+ "text": t,
81
+ "model_A": A[0],
82
+ "instruction_A": A[1]["instruction"],
83
+ "response_A": A[1]["response"],
84
+ "model_B": B[0],
85
+ "instruction_B": B[1]["instruction"],
86
+ "response_B": B[1]["response"],
87
+ }
88
+ )
89
+
90
+ comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
91
+ return comps
92
+
93
+ def save_row(annotator_type, item, choice):
94
+ row = {
95
+ "annotator_type": annotator_type,
96
+ "source_type": item["source_type"],
97
+ "text": item["text"],
98
+ "model_A": item["model_A"],
99
+ "model_B": item["model_B"],
100
+ "choice": choice,
101
+ "instruction_A": item["instruction_A"],
102
+ "response_A": item["response_A"],
103
+ "instruction_B": item["instruction_B"],
104
+ "response_B": item["response_B"],
105
+ "timestamp": time.time(),
106
+ }
107
+ pd.DataFrame([row]).to_csv(OUT_FILE, mode="a", header=False, index=False)
108
+
109
+ QUESTION_MD = (
110
+ "**Question:** Which Question–Answer pair exhibits a stronger command of Irish grammar and "
111
+ "semantic coherence? Take the use of the reference text into account. If unsure, pick the one "
112
+ "with a stronger display of Irish grammar. Choose A or B."
113
+ )
114
+
115
+ CONSENT_MD = f"""
116
+ ### Irish QA Pair Comparison (Master’s Thesis)
117
+
118
+ You are invited to take part in a study on Large Language Model Irish-language QA quality.
119
+ By continuing, you consent to the following:
120
+
121
+ - Your annotations will be **anonymised** (we only record whether you are a **Learner** or **Native speaker**).
122
+ - The dataset (reference text + model outputs + your choices) will be released **open-source** for both research and commercial purposes.
123
+ - No personal data is collected beyond your level of Irish. You may stop at any time before submission.
124
+
125
+ - You will answer the following question:
126
+
127
+ #### "Which Question–Answer pair exhibits a stronger command of Irish grammar and semantic coherence? Take the use of the reference text into account. If unsure, pick the one with a stronger display of Irish grammar. Choose A or B.".
128
+
129
+ - Only base your decision on this question and not other factors.
130
+
131
+
132
+
133
+ Please confirm consent, select your annotator type and the source to evaluate, then press **Begin**.
134
+ """
135
+
136
+ with gr.Blocks() as demo:
137
+ # ---------- PAGE 1: Consent + Role + Source ----------
138
+ with gr.Group(visible=True) as page1:
139
+ gr.Markdown(CONSENT_MD)
140
+ consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False)
141
+ role_dd = gr.Dropdown(["Learner", "Native"], label="Annotator Type (required)", value=None)
142
+ source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None)
143
+ begin_btn = gr.Button("Begin")
144
+ gate_msg = gr.Markdown()
145
+
146
+ # ---------- PAGE 2: Task ----------
147
+ with gr.Group(visible=False) as page2:
148
+ crit = gr.Markdown(QUESTION_MD)
149
+ counter = gr.Markdown()
150
+ ref_text = gr.Textbox(label="Reference Text", interactive=False, lines=8)
151
+ with gr.Row():
152
+ with gr.Column():
153
+ instA = gr.Textbox(label="Instruction A", interactive=False)
154
+ respA = gr.Textbox(label="Response A", interactive=False, lines=8)
155
+ with gr.Column():
156
+ instB = gr.Textbox(label="Instruction B", interactive=False)
157
+ respB = gr.Textbox(label="Response B", interactive=False, lines=8)
158
+ with gr.Row():
159
+ btnA = gr.Button("A is Better")
160
+ btnB = gr.Button("B is Better")
161
+ status = gr.Markdown()
162
+
163
+ # ---------- State ----------
164
+ annotator_type = gr.State("") # Learner | Native
165
+ source_state = gr.State(None) # Wiki | Oireachtas
166
+ comps_state = gr.State([]) # list of dicts
167
+ idx_state = gr.State(0)
168
+
169
+ # ---------- Handlers ----------
170
+ def begin(consent, role, source):
171
+ if not consent:
172
+ return ("**Please tick the consent checkbox to proceed.**",
173
+ gr.update(visible=True), gr.update(visible=False),
174
+ "", "", "", "", "", "", "", "", "", "", "")
175
+ if role not in ["Learner", "Native"]:
176
+ return ("**Please select your annotator type.**",
177
+ gr.update(visible=True), gr.update(visible=False),
178
+ "", "", "", "", "", "", "", "", "", "", "")
179
+ if source not in ["Wiki", "Oireachtas"]:
180
+ return ("**Please select a source (Wikipedia/Oireachtas).**",
181
+ gr.update(visible=True), gr.update(visible=False),
182
+ "", "", "", "", "", "", "", "", "", "", "")
183
+
184
+ comp_list = build_comparisons_k(source, K)
185
+ if not comp_list:
186
+ return ("**No items found for the selected source.**",
187
+ gr.update(visible=True), gr.update(visible=False),
188
+ "", "", "", "", "", "", "", "", "", "", "")
189
+
190
+ i = 0
191
+ item = comp_list[i]
192
+ return ("", # clear gate msg
193
+ gr.update(visible=False), gr.update(visible=True), # show page2
194
+ f"{i+1} / {len(comp_list)}",
195
+ item["text"], item["instruction_A"], item["response_A"],
196
+ item["instruction_B"], item["response_B"],
197
+ role, source, comp_list, i,
198
+ gr.update(interactive=True), gr.update(interactive=True))
199
+
200
+ begin_btn.click(
201
+ begin,
202
+ inputs=[consent_chk, role_dd, source_dd],
203
+ outputs=[
204
+ gate_msg, page1, page2,
205
+ counter, ref_text, instA, respA, instB, respB,
206
+ annotator_type, source_state, comps_state, idx_state,
207
+ btnA, btnB
208
+ ],
209
+ )
210
+
211
+ def choose(choice, role, source, comp_list, i):
212
+ role = (role or "").strip()
213
+ if not role or not comp_list:
214
+ return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
215
+ gr.update(interactive=False), gr.update(interactive=False), i)
216
+
217
+ item = comp_list[i]
218
+ save_row(role, item, choice)
219
+
220
+ i += 1
221
+ if i >= len(comp_list):
222
+ # Done: disable buttons, clear fields, lock progress at max
223
+ return ("**Done — thank you!**",
224
+ f"{len(comp_list)} / {len(comp_list)}", "", "", "", "",
225
+ gr.update(interactive=False), gr.update(interactive=False), i)
226
+
227
+ nxt = comp_list[i]
228
+ return (f"Saved: {choice}",
229
+ f"{i+1} / {len(comp_list)}",
230
+ nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
231
+ gr.update(interactive=True), gr.update(interactive=True), i)
232
+
233
+ btnA.click(
234
+ lambda role, src, comps, i: choose("A", role, src, comps, i),
235
+ inputs=[annotator_type, source_state, comps_state, idx_state],
236
+ outputs=[status, counter, ref_text, instA, respA, instB, respB, btnA, btnB, idx_state],
237
+ )
238
+ btnB.click(
239
+ lambda role, src, comps, i: choose("B", role, src, comps, i),
240
+ inputs=[annotator_type, source_state, comps_state, idx_state],
241
+ outputs=[status, counter, ref_text, instA, respA, instB, respB, btnA, btnB, idx_state],
242
+ )
243
+
244
+ demo.launch()