akaburia commited on
Commit
fe635a0
Β·
verified Β·
1 Parent(s): fd436ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +365 -0
app.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import os
4
+ import io
5
+ import json
6
+ from google.colab import auth
7
+ import gspread
8
+ from google.auth import default
9
+ from huggingface_hub import HfApi, hf_hub_download
10
+
11
+ # ==========================================
12
+ # 1. AUTHENTICATION (GOOGLE SHEETS)
13
+ # ==========================================
14
+ print("Authenticating with Google...")
15
+ auth.authenticate_user()
16
+ creds, _ = default()
17
+ gc = gspread.authorize(creds)
18
+
19
+ spreadsheet = gc.open_by_key('12JM3u10WSpshCcSUEmjhRP5i2bWe9MAK_jrbI56WOCU')
20
+
21
+ def get_worksheet_by_number(spreadsheet, worksheet_number, format=True):
22
+ worksheet = spreadsheet.get_worksheet(worksheet_number)
23
+ rows = worksheet.get_all_values()
24
+ df = pd.DataFrame.from_records(rows[1:], columns=rows[0])
25
+ if format:
26
+ df = df.iloc[2:]
27
+ df.columns = df.iloc[0].values
28
+ df = df.iloc[1:]
29
+ df = df.replace('', pd.NA)
30
+ df['Sector'] = df['Sector'].ffill()
31
+ df['Policy'] = df['Policy'].ffill()
32
+ return df
33
+
34
+ print("Loading Data from Google Sheets...")
35
+ land_df = get_worksheet_by_number(spreadsheet, 3, format=True)
36
+ water_df = get_worksheet_by_number(spreadsheet, 5, format=True)
37
+
38
+ DOMAIN_MAP = {"Land": land_df, "Water": water_df}
39
+ DOMAINS = list(DOMAIN_MAP.keys())
40
+
41
+ # ==========================================
42
+ # 2. CONFIGURATION & HUGGING FACE SETUP
43
+ # ==========================================
44
+ # Hugging Face Settings
45
+ HF = 'hf'
46
+ token = 'GbeqFrdNnENcHiJtUnTKcAbVkneXrlOkHb' # It is recommended to use os.environ.get("HF_TOKEN") in production
47
+ HF_TOKEN = os.environ.get("HF_TOKEN", f"{HF}_{token}")
48
+ HF_DATASET_REPO = "akaburia/policy-evaluations"
49
+ HF_CSV_FILE = "policy_coherence_annotations.csv"
50
+
51
+ # Approved Emails Parsing
52
+ emails_env_string = os.environ.get("APPROVED_EMAILS", "{}")
53
+ try:
54
+ APPROVED_EMAILS = json.loads(emails_env_string)
55
+ APPROVED_EMAILS = {k.lower(): v for k, v in APPROVED_EMAILS.items()}
56
+ except (json.JSONDecodeError, TypeError) as e:
57
+ print(f"⚠️ Error parsing APPROVED_EMAILS: {e}. Using fallback.")
58
+ APPROVED_EMAILS = {
59
+ "kaburiaaustin1@tahmo.org": "user1",
60
+ "e.ramos@tudelft.nl" : "user2",
61
+ "eunice.pramos@gmail.com" : "user3",
62
+ "e.abraham@tudelft.nl" : "user4",
63
+ "dene.abv@gmail.com" : "user5",
64
+ "rafatoufofana.abv@gmail.com" : "user6",
65
+ "annorfrank@tahmo.org" : "user7",
66
+ "n.marley@tahmo.org" : "user8",
67
+ "h.f.hagenaars@tudelft.nl" : "user9",
68
+ "kaburiaaustin1@gmail.com" : "user10",
69
+ "faridakone@gmail.com": "user11"
70
+ }
71
+
72
+ AVAILABLE_COLUMNS = [
73
+ 'Sector', 'Policy', 'General Vision', 'General policy objective',
74
+ 'Strategic objectives / directions', 'Focus Area / Policy Action Category',
75
+ 'Policy objectives (of the focus area)', 'Policy Actions and Measures (PAMs)',
76
+ 'Policy Targets / Indicators'
77
+ ]
78
+
79
+ DRILL_DOWN_MAP = {
80
+ "coherent": ["+3 Indivisible", "+2 Reinforcing", "+1 Enabling"],
81
+ "neutral": ["0 Consistent"],
82
+ "incoherent": ["-1 Constraining", "-2 Counteracting", "-3 Cancelling"]
83
+ }
84
+
85
+ def get_unique_items(df, policy_name, col_name):
86
+ if policy_name not in df['Policy'].values: return []
87
+ items = df[df['Policy'] == policy_name][col_name].dropna().unique().tolist()
88
+ return [str(i).strip() for i in items if str(i).strip()]
89
+
90
+ def get_sector_for_policy(df, policy_name):
91
+ if policy_name not in df['Policy'].values: return "Unknown Sector"
92
+ return str(df[df['Policy'] == policy_name]['Sector'].iloc[0]).strip()
93
+
94
+ def get_policy_list(domain_key):
95
+ if not domain_key: return []
96
+ return [p for p in DOMAIN_MAP[domain_key]['Policy'].unique() if str(p).strip()]
97
+
98
+ # HF Data Loader
99
+ def load_hf_dataset():
100
+ try:
101
+ path = hf_hub_download(repo_id=HF_DATASET_REPO, filename=HF_CSV_FILE, repo_type="dataset", token=HF_TOKEN)
102
+ return pd.read_csv(path)
103
+ except Exception as e:
104
+ print(f"HF Dataset not found or error loading ({e}). Starting fresh.")
105
+ return pd.DataFrame(columns=[
106
+ "Domain_A", "Sector_A", "Policy_A_Name",
107
+ "Domain_B", "Sector_B", "Policy_B_Name",
108
+ "Target_Column", "Target_A_Row", "Target_B_Row",
109
+ "Context_Column", "Context_A_Chunk", "Context_B_Chunk",
110
+ "Coherence_Label", "Drill_Down_Label", "Justification", "AnnotatorUsername"
111
+ ])
112
+
113
+ # ==========================================
114
+ # 3. GRADIO UI DESIGN
115
+ # ==========================================
116
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
117
+ gr.Markdown("# πŸ›οΈ Collaborative Policy Coherence Annotator")
118
+
119
+ # Global Application States
120
+ hf_df_state = gr.State()
121
+ user_tag_state = gr.State()
122
+ target_pairs_state = gr.State([])
123
+ current_index_state = gr.State(0)
124
+
125
+ # --- LOGIN PANEL ---
126
+ with gr.Group() as login_box:
127
+ gr.Markdown("### πŸ” User Login")
128
+ with gr.Row():
129
+ email_box = gr.Textbox(label="Authorized Email", placeholder="Enter your email to load the Hugging Face dataset...")
130
+ login_btn = gr.Button("Login & Sync Dataset", variant="primary")
131
+ login_status = gr.Markdown(value="Waiting for login...")
132
+
133
+ # --- MAIN APPLICATION (Hidden until login) ---
134
+ with gr.Group(visible=False) as app_box:
135
+
136
+ with gr.Accordion("βš™οΈ 1. Workspace Configuration", open=True):
137
+ gr.Markdown("Select your policies. The UI checks Hugging Face and **only loads unannotated pairs**.")
138
+
139
+ with gr.Row():
140
+ with gr.Column(scale=1):
141
+ gr.Markdown("### Origin A")
142
+ domain_a_dd = gr.Dropdown(choices=DOMAINS, value="Land", label="Domain A")
143
+ policy_a_dd = gr.Dropdown(choices=get_policy_list("Land"), label="Policy A")
144
+
145
+ with gr.Column(scale=1):
146
+ gr.Markdown("### Origin B")
147
+ domain_b_dd = gr.Dropdown(choices=DOMAINS, value="Water", label="Domain B")
148
+ policy_b_dd = gr.Dropdown(choices=get_policy_list("Water"), label="Policy B")
149
+
150
+ with gr.Row():
151
+ target_col_dd = gr.Dropdown(choices=AVAILABLE_COLUMNS, value='Policy objectives (of the focus area)', label="Unified Target Column (Iterated Row-by-Row)")
152
+ context_col_dd = gr.Dropdown(choices=AVAILABLE_COLUMNS, value='Policy Actions and Measures (PAMs)', label="Unified Context Column (Displayed as Chunk)")
153
+
154
+ load_btn = gr.Button("Fetch & Filter Unlabelled Pairs πŸš€", variant="primary")
155
+
156
+ gr.Markdown("---")
157
+ progress_text = gr.Markdown("**Progress:** Waiting for workspace load...")
158
+
159
+ with gr.Group(visible=False) as workspace_box:
160
+ with gr.Row():
161
+ with gr.Column(scale=1, variant="panel"):
162
+ meta_a = gr.Markdown("### πŸ“„ Domain A Setup")
163
+ display_target_a = gr.Textbox(label="🎯 Target A (Current Row)", interactive=False, lines=6)
164
+ display_context_a = gr.Textbox(label="πŸ“š Context A (Chunk Reference)", interactive=False, lines=8)
165
+
166
+ with gr.Column(scale=1, variant="panel"):
167
+ meta_b = gr.Markdown("### πŸ“„ Domain B Setup")
168
+ display_target_b = gr.Textbox(label="🎯 Target B (Current Row)", interactive=False, lines=6)
169
+ display_context_b = gr.Textbox(label="πŸ“š Context B (Chunk Reference)", interactive=False, lines=8)
170
+
171
+ with gr.Group():
172
+ gr.Markdown("### ✍️ Annotation Decision")
173
+ with gr.Row():
174
+ with gr.Column(scale=2):
175
+ label_radio = gr.Radio(choices=["coherent", "neutral", "incoherent"], label="1. Top-Level Coherence")
176
+ drill_down_dropdown = gr.Dropdown(choices=[], label="2. Drill-Down Interaction", interactive=True)
177
+ with gr.Column(scale=3):
178
+ justification_box = gr.Textbox(label="3. Justification", lines=3, placeholder="Explain your reasoning here...")
179
+
180
+ with gr.Row():
181
+ skip_btn = gr.Button("Skip This Pair", size="lg")
182
+ save_btn = gr.Button("Save to HF & Next", variant="primary", size="lg")
183
+
184
+ status_box = gr.Textbox(label="System Log", interactive=False)
185
+
186
+
187
+ # ==========================================
188
+ # 4. EVENT CONTROLLERS
189
+ # ==========================================
190
+
191
+ # --- Auth & Initialization ---
192
+ def authenticate(email):
193
+ clean_email = email.strip().lower()
194
+ if clean_email not in APPROVED_EMAILS:
195
+ return gr.update(value=f"<font color='red'>Error: Unauthorized email.</font>"), gr.update(visible=True), gr.update(visible=False), None, None
196
+
197
+ user_tag = APPROVED_EMAILS[clean_email]
198
+ hf_df = load_hf_dataset()
199
+ status_msg = f"βœ… Logged in as **{user_tag}**. Loaded {len(hf_df)} existing annotations from Hugging Face."
200
+
201
+ return gr.update(value=status_msg), gr.update(visible=False), gr.update(visible=True), user_tag, hf_df
202
+
203
+ login_btn.click(fn=authenticate, inputs=[email_box], outputs=[login_status, login_box, app_box, user_tag_state, hf_df_state])
204
+
205
+ # --- UI Dynamics ---
206
+ domain_a_dd.change(fn=lambda d: gr.update(choices=get_policy_list(d), value=None), inputs=domain_a_dd, outputs=policy_a_dd)
207
+ domain_b_dd.change(fn=lambda d: gr.update(choices=get_policy_list(d), value=None), inputs=domain_b_dd, outputs=policy_b_dd)
208
+
209
+ def render_target_pair(pairs, idx):
210
+ if not pairs:
211
+ return "**Progress:** No unannotated pairs found.", "N/A", "N/A", gr.update(value=None), gr.update(choices=[], value=None), gr.update(value="")
212
+ if idx >= len(pairs):
213
+ return f"**πŸŽ‰ Completed all pairs in this configuration!**", "End of list.", "End of list.", gr.update(value=None), gr.update(choices=[], value=None), gr.update(value="")
214
+
215
+ prog = f"**Progress:** Annotating Pair {idx + 1} of {len(pairs)}"
216
+ return prog, pairs[idx][0], pairs[idx][1], gr.update(value=None), gr.update(choices=[], value=None), gr.update(value="")
217
+
218
+ # --- Load & Filter Workspace ---
219
+ def load_workspace(dom_a, pol_a, dom_b, pol_b, tar_col, ctx_col, hf_df):
220
+ if not pol_a or not pol_b:
221
+ return [gr.update(value="⚠️ Select both policies first!")] + [gr.skip()]*10 + [gr.update(visible=False)]
222
+
223
+ df_a = DOMAIN_MAP[dom_a]
224
+ df_b = DOMAIN_MAP[dom_b]
225
+
226
+ sec_a = get_sector_for_policy(df_a, pol_a)
227
+ sec_b = get_sector_for_policy(df_b, pol_b)
228
+ meta_a_str = f"### πŸ“„ Domain A\n**Sector:** {sec_a} | **Policy:** {pol_a}"
229
+ meta_b_str = f"### πŸ“„ Domain B\n**Sector:** {sec_b} | **Policy:** {pol_b}"
230
+
231
+ targets_a = get_unique_items(df_a, pol_a, tar_col)
232
+ targets_b = get_unique_items(df_b, pol_b, tar_col)
233
+ all_pairs = [(a, b) for a in targets_a for b in targets_b]
234
+
235
+ # FILTER OUT ALREADY ANNOTATED PAIRS
236
+ unannotated_pairs = []
237
+ for a, b in all_pairs:
238
+ match = hf_df[
239
+ (hf_df["Policy_A_Name"] == pol_a) &
240
+ (hf_df["Policy_B_Name"] == pol_b) &
241
+ (hf_df["Target_A_Row"] == a) &
242
+ (hf_df["Target_B_Row"] == b)
243
+ ]
244
+ if match.empty:
245
+ unannotated_pairs.append((a, b))
246
+
247
+ contexts_a = get_unique_items(df_a, pol_a, ctx_col)
248
+ contexts_b = get_unique_items(df_b, pol_b, ctx_col)
249
+ ctx_a_chunk = "\n\n".join([f"β€’ {c}" for c in contexts_a]) if contexts_a else "No context data."
250
+ ctx_b_chunk = "\n\n".join([f"β€’ {c}" for c in contexts_b]) if contexts_b else "No context data."
251
+
252
+ prog, target_a_display, target_b_display, reset_lbl, reset_drill, reset_just = render_target_pair(unannotated_pairs, 0)
253
+
254
+ status_msg = f"Workspace loaded. Filtered {len(all_pairs) - len(unannotated_pairs)} already annotated items. {len(unannotated_pairs)} remaining."
255
+
256
+ return [
257
+ unannotated_pairs, 0,
258
+ prog, meta_a_str, target_a_display, ctx_a_chunk,
259
+ meta_b_str, target_b_display, ctx_b_chunk,
260
+ reset_lbl, reset_drill, reset_just,
261
+ status_msg,
262
+ gr.update(visible=len(unannotated_pairs) > 0) # Show workspace if items exist
263
+ ]
264
+
265
+ def update_drill(label):
266
+ choices = DRILL_DOWN_MAP.get(label, [])
267
+ return gr.update(choices=choices, value=choices[0] if len(choices) == 1 else None, interactive=len(choices)>0)
268
+ label_radio.change(fn=update_drill, inputs=label_radio, outputs=drill_down_dropdown)
269
+
270
+ # --- Save to Hugging Face ---
271
+ def save_action(idx, pairs, ctx_a_chunk, ctx_b_chunk, dom_a, pol_a, dom_b, pol_b, tar_col, ctx_col, label, drill_down, justification, user_tag, hf_df):
272
+ if not label or not drill_down:
273
+ return gr.update(value="⚠️ Error: Label and Drill-Down are required."), idx, hf_df
274
+ if idx >= len(pairs):
275
+ return gr.update(value="⚠️ End of list."), idx, hf_df
276
+
277
+ new_row = {
278
+ "Domain_A": dom_a,
279
+ "Sector_A": get_sector_for_policy(DOMAIN_MAP[dom_a], pol_a),
280
+ "Policy_A_Name": pol_a,
281
+ "Domain_B": dom_b,
282
+ "Sector_B": get_sector_for_policy(DOMAIN_MAP[dom_b], pol_b),
283
+ "Policy_B_Name": pol_b,
284
+ "Target_Column": tar_col,
285
+ "Target_A_Row": pairs[idx][0],
286
+ "Target_B_Row": pairs[idx][1],
287
+ "Context_Column": ctx_col,
288
+ "Context_A_Chunk": ctx_a_chunk,
289
+ "Context_B_Chunk": ctx_b_chunk,
290
+ "Coherence_Label": label,
291
+ "Drill_Down_Label": drill_down,
292
+ "Justification": justification.strip() if justification else "",
293
+ "AnnotatorUsername": user_tag
294
+ }
295
+
296
+ # 1. Update State DF
297
+ new_df = pd.DataFrame([new_row])
298
+ hf_df = pd.concat([hf_df, new_df], ignore_index=True)
299
+
300
+ # 2. Push to Hugging Face
301
+ try:
302
+ csv_buffer = io.StringIO()
303
+ hf_df.to_csv(csv_buffer, index=False)
304
+ csv_bytes = csv_buffer.getvalue().encode('utf-8')
305
+
306
+ api = HfApi()
307
+ api.upload_file(
308
+ path_or_fileobj=io.BytesIO(csv_bytes),
309
+ path_in_repo=HF_CSV_FILE,
310
+ repo_id=HF_DATASET_REPO,
311
+ token=HF_TOKEN,
312
+ repo_type="dataset"
313
+ )
314
+ log_msg = f"βœ… Pair {idx + 1} saved to Hub by {user_tag}."
315
+ except Exception as e:
316
+ log_msg = f"❌ Error saving to Hub: {e}"
317
+
318
+ return gr.update(value=log_msg), idx + 1, hf_df
319
+
320
+ def skip_action(idx):
321
+ return gr.update(value=f"⏭️ Skipped Pair {idx + 1}."), idx + 1
322
+
323
+ # ==========================================
324
+ # 5. BUTTON WIRING
325
+ # ==========================================
326
+
327
+ load_btn.click(
328
+ fn=load_workspace,
329
+ inputs=[
330
+ domain_a_dd, policy_a_dd,
331
+ domain_b_dd, policy_b_dd,
332
+ target_col_dd, context_col_dd, hf_df_state
333
+ ],
334
+ outputs=[
335
+ target_pairs_state, current_index_state,
336
+ progress_text, meta_a, display_target_a, display_context_a,
337
+ meta_b, display_target_b, display_context_b,
338
+ label_radio, drill_down_dropdown, justification_box, status_box, workspace_box
339
+ ]
340
+ )
341
+
342
+ save_btn.click(
343
+ fn=save_action,
344
+ inputs=[
345
+ current_index_state, target_pairs_state, display_context_a, display_context_b,
346
+ domain_a_dd, policy_a_dd, domain_b_dd, policy_b_dd,
347
+ target_col_dd, context_col_dd,
348
+ label_radio, drill_down_dropdown, justification_box, user_tag_state, hf_df_state
349
+ ],
350
+ outputs=[status_box, current_index_state, hf_df_state]
351
+ ).then(
352
+ fn=render_target_pair,
353
+ inputs=[target_pairs_state, current_index_state],
354
+ outputs=[progress_text, display_target_a, display_target_b, label_radio, drill_down_dropdown, justification_box]
355
+ )
356
+
357
+ skip_btn.click(
358
+ fn=skip_action, inputs=[current_index_state], outputs=[status_box, current_index_state]
359
+ ).then(
360
+ fn=render_target_pair,
361
+ inputs=[target_pairs_state, current_index_state],
362
+ outputs=[progress_text, display_target_a, display_target_b, label_radio, drill_down_dropdown, justification_box]
363
+ )
364
+
365
+ demo.launch(debug=True)