Spaces:

akaburia
/

policy-coherence-annotations

Sleeping

App Files Files Community

akaburia commited on Apr 7

Commit

fe635a0

verified ·

1 Parent(s): fd436ca

Create app.py

Browse files

Files changed (1) hide show

app.py +365 -0

app.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import pandas as pd
+import gradio as gr
+import os
+import io
+import json
+from google.colab import auth
+import gspread
+from google.auth import default
+from huggingface_hub import HfApi, hf_hub_download
+# ==========================================
+# 1. AUTHENTICATION (GOOGLE SHEETS)
+# ==========================================
+print("Authenticating with Google...")
+auth.authenticate_user()
+creds, _ = default()
+gc = gspread.authorize(creds)
+spreadsheet = gc.open_by_key('12JM3u10WSpshCcSUEmjhRP5i2bWe9MAK_jrbI56WOCU')
+def get_worksheet_by_number(spreadsheet, worksheet_number, format=True):
+    worksheet = spreadsheet.get_worksheet(worksheet_number)
+    rows = worksheet.get_all_values()
+    df = pd.DataFrame.from_records(rows[1:], columns=rows[0])
+    if format:
+        df = df.iloc[2:]
+        df.columns = df.iloc[0].values
+        df = df.iloc[1:]
+        df = df.replace('', pd.NA)
+        df['Sector'] = df['Sector'].ffill()
+        df['Policy'] = df['Policy'].ffill()
+    return df
+print("Loading Data from Google Sheets...")
+land_df = get_worksheet_by_number(spreadsheet, 3, format=True)
+water_df = get_worksheet_by_number(spreadsheet, 5, format=True)
+DOMAIN_MAP = {"Land": land_df, "Water": water_df}
+DOMAINS = list(DOMAIN_MAP.keys())
+# ==========================================
+# 2. CONFIGURATION & HUGGING FACE SETUP
+# ==========================================
+# Hugging Face Settings
+HF = 'hf'
+token = 'GbeqFrdNnENcHiJtUnTKcAbVkneXrlOkHb' # It is recommended to use os.environ.get("HF_TOKEN") in production
+HF_TOKEN = os.environ.get("HF_TOKEN", f"{HF}_{token}")
+HF_DATASET_REPO = "akaburia/policy-evaluations"
+HF_CSV_FILE = "policy_coherence_annotations.csv"
+# Approved Emails Parsing
+emails_env_string = os.environ.get("APPROVED_EMAILS", "{}")
+try:
+    APPROVED_EMAILS = json.loads(emails_env_string)
+    APPROVED_EMAILS = {k.lower(): v for k, v in APPROVED_EMAILS.items()}
+except (json.JSONDecodeError, TypeError) as e:
+    print(f"⚠️ Error parsing APPROVED_EMAILS: {e}. Using fallback.")
+    APPROVED_EMAILS = {
+        "kaburiaaustin1@tahmo.org": "user1",
+        "e.ramos@tudelft.nl" : "user2",
+        "eunice.pramos@gmail.com" : "user3",
+        "e.abraham@tudelft.nl" : "user4",
+        "dene.abv@gmail.com" : "user5",
+        "rafatoufofana.abv@gmail.com" : "user6",
+        "annorfrank@tahmo.org" : "user7",
+        "n.marley@tahmo.org" : "user8",
+        "h.f.hagenaars@tudelft.nl" : "user9",
+        "kaburiaaustin1@gmail.com" : "user10",
+        "faridakone@gmail.com": "user11"
+    }
+AVAILABLE_COLUMNS = [
+    'Sector', 'Policy', 'General Vision', 'General policy objective',
+    'Strategic objectives / directions', 'Focus Area / Policy Action Category',
+    'Policy objectives (of the focus area)', 'Policy Actions and Measures (PAMs)',
+    'Policy Targets / Indicators'
+]
+DRILL_DOWN_MAP = {
+    "coherent": ["+3 Indivisible", "+2 Reinforcing", "+1 Enabling"],
+    "neutral": ["0 Consistent"],
+    "incoherent": ["-1 Constraining", "-2 Counteracting", "-3 Cancelling"]
+}
+def get_unique_items(df, policy_name, col_name):
+    if policy_name not in df['Policy'].values: return []
+    items = df[df['Policy'] == policy_name][col_name].dropna().unique().tolist()
+    return [str(i).strip() for i in items if str(i).strip()]
+def get_sector_for_policy(df, policy_name):
+    if policy_name not in df['Policy'].values: return "Unknown Sector"
+    return str(df[df['Policy'] == policy_name]['Sector'].iloc[0]).strip()
+def get_policy_list(domain_key):
+    if not domain_key: return []
+    return [p for p in DOMAIN_MAP[domain_key]['Policy'].unique() if str(p).strip()]
+# HF Data Loader
+def load_hf_dataset():
+    try:
+        path = hf_hub_download(repo_id=HF_DATASET_REPO, filename=HF_CSV_FILE, repo_type="dataset", token=HF_TOKEN)
+        return pd.read_csv(path)
+    except Exception as e:
+        print(f"HF Dataset not found or error loading ({e}). Starting fresh.")
+        return pd.DataFrame(columns=[
+            "Domain_A", "Sector_A", "Policy_A_Name",
+            "Domain_B", "Sector_B", "Policy_B_Name",
+            "Target_Column", "Target_A_Row", "Target_B_Row",
+            "Context_Column", "Context_A_Chunk", "Context_B_Chunk",
+            "Coherence_Label", "Drill_Down_Label", "Justification", "AnnotatorUsername"
+        ])
+# ==========================================
+# 3. GRADIO UI DESIGN
+# ==========================================
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🏛️ Collaborative Policy Coherence Annotator")
+    # Global Application States
+    hf_df_state = gr.State()
+    user_tag_state = gr.State()
+    target_pairs_state = gr.State([])
+    current_index_state = gr.State(0)
+    # --- LOGIN PANEL ---
+    with gr.Group() as login_box:
+        gr.Markdown("### 🔐 User Login")
+        with gr.Row():
+            email_box = gr.Textbox(label="Authorized Email", placeholder="Enter your email to load the Hugging Face dataset...")
+        login_btn = gr.Button("Login & Sync Dataset", variant="primary")
+        login_status = gr.Markdown(value="Waiting for login...")
+    # --- MAIN APPLICATION (Hidden until login) ---
+    with gr.Group(visible=False) as app_box:
+        with gr.Accordion("⚙️ 1. Workspace Configuration", open=True):
+            gr.Markdown("Select your policies. The UI checks Hugging Face and **only loads unannotated pairs**.")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Origin A")
+                    domain_a_dd = gr.Dropdown(choices=DOMAINS, value="Land", label="Domain A")
+                    policy_a_dd = gr.Dropdown(choices=get_policy_list("Land"), label="Policy A")
+                with gr.Column(scale=1):
+                    gr.Markdown("### Origin B")
+                    domain_b_dd = gr.Dropdown(choices=DOMAINS, value="Water", label="Domain B")
+                    policy_b_dd = gr.Dropdown(choices=get_policy_list("Water"), label="Policy B")
+            with gr.Row():
+                target_col_dd = gr.Dropdown(choices=AVAILABLE_COLUMNS, value='Policy objectives (of the focus area)', label="Unified Target Column (Iterated Row-by-Row)")
+                context_col_dd = gr.Dropdown(choices=AVAILABLE_COLUMNS, value='Policy Actions and Measures (PAMs)', label="Unified Context Column (Displayed as Chunk)")
+            load_btn = gr.Button("Fetch & Filter Unlabelled Pairs 🚀", variant="primary")
+        gr.Markdown("---")
+        progress_text = gr.Markdown("**Progress:** Waiting for workspace load...")
+        with gr.Group(visible=False) as workspace_box:
+            with gr.Row():
+                with gr.Column(scale=1, variant="panel"):
+                    meta_a = gr.Markdown("### 📄 Domain A Setup")
+                    display_target_a = gr.Textbox(label="🎯 Target A (Current Row)", interactive=False, lines=6)
+                    display_context_a = gr.Textbox(label="📚 Context A (Chunk Reference)", interactive=False, lines=8)
+                with gr.Column(scale=1, variant="panel"):
+                    meta_b = gr.Markdown("### 📄 Domain B Setup")
+                    display_target_b = gr.Textbox(label="🎯 Target B (Current Row)", interactive=False, lines=6)
+                    display_context_b = gr.Textbox(label="📚 Context B (Chunk Reference)", interactive=False, lines=8)
+            with gr.Group():
+                gr.Markdown("### ✍️ Annotation Decision")
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        label_radio = gr.Radio(choices=["coherent", "neutral", "incoherent"], label="1. Top-Level Coherence")
+                        drill_down_dropdown = gr.Dropdown(choices=[], label="2. Drill-Down Interaction", interactive=True)
+                    with gr.Column(scale=3):
+                        justification_box = gr.Textbox(label="3. Justification", lines=3, placeholder="Explain your reasoning here...")
+                with gr.Row():
+                    skip_btn = gr.Button("Skip This Pair", size="lg")
+                    save_btn = gr.Button("Save to HF & Next", variant="primary", size="lg")
+                status_box = gr.Textbox(label="System Log", interactive=False)
+    # ==========================================
+    # 4. EVENT CONTROLLERS
+    # ==========================================
+    # --- Auth & Initialization ---
+    def authenticate(email):
+        clean_email = email.strip().lower()
+        if clean_email not in APPROVED_EMAILS:
+            return gr.update(value=f"<font color='red'>Error: Unauthorized email.</font>"), gr.update(visible=True), gr.update(visible=False), None, None
+        user_tag = APPROVED_EMAILS[clean_email]
+        hf_df = load_hf_dataset()
+        status_msg = f"✅ Logged in as **{user_tag}**. Loaded {len(hf_df)} existing annotations from Hugging Face."
+        return gr.update(value=status_msg), gr.update(visible=False), gr.update(visible=True), user_tag, hf_df
+    login_btn.click(fn=authenticate, inputs=[email_box], outputs=[login_status, login_box, app_box, user_tag_state, hf_df_state])
+    # --- UI Dynamics ---
+    domain_a_dd.change(fn=lambda d: gr.update(choices=get_policy_list(d), value=None), inputs=domain_a_dd, outputs=policy_a_dd)
+    domain_b_dd.change(fn=lambda d: gr.update(choices=get_policy_list(d), value=None), inputs=domain_b_dd, outputs=policy_b_dd)
+    def render_target_pair(pairs, idx):
+        if not pairs:
+            return "**Progress:** No unannotated pairs found.", "N/A", "N/A", gr.update(value=None), gr.update(choices=[], value=None), gr.update(value="")
+        if idx >= len(pairs):
+            return f"**🎉 Completed all pairs in this configuration!**", "End of list.", "End of list.", gr.update(value=None), gr.update(choices=[], value=None), gr.update(value="")
+        prog = f"**Progress:** Annotating Pair {idx + 1} of {len(pairs)}"
+        return prog, pairs[idx][0], pairs[idx][1], gr.update(value=None), gr.update(choices=[], value=None), gr.update(value="")
+    # --- Load & Filter Workspace ---
+    def load_workspace(dom_a, pol_a, dom_b, pol_b, tar_col, ctx_col, hf_df):
+        if not pol_a or not pol_b:
+            return [gr.update(value="⚠️ Select both policies first!")] + [gr.skip()]*10 + [gr.update(visible=False)]
+        df_a = DOMAIN_MAP[dom_a]
+        df_b = DOMAIN_MAP[dom_b]
+        sec_a = get_sector_for_policy(df_a, pol_a)
+        sec_b = get_sector_for_policy(df_b, pol_b)
+        meta_a_str = f"### 📄 Domain A\n**Sector:** {sec_a} | **Policy:** {pol_a}"
+        meta_b_str = f"### 📄 Domain B\n**Sector:** {sec_b} | **Policy:** {pol_b}"
+        targets_a = get_unique_items(df_a, pol_a, tar_col)
+        targets_b = get_unique_items(df_b, pol_b, tar_col)
+        all_pairs = [(a, b) for a in targets_a for b in targets_b]
+        # FILTER OUT ALREADY ANNOTATED PAIRS
+        unannotated_pairs = []
+        for a, b in all_pairs:
+            match = hf_df[
+                (hf_df["Policy_A_Name"] == pol_a) &
+                (hf_df["Policy_B_Name"] == pol_b) &
+                (hf_df["Target_A_Row"] == a) &
+                (hf_df["Target_B_Row"] == b)
+            ]
+            if match.empty:
+                unannotated_pairs.append((a, b))
+        contexts_a = get_unique_items(df_a, pol_a, ctx_col)
+        contexts_b = get_unique_items(df_b, pol_b, ctx_col)
+        ctx_a_chunk = "\n\n".join([f"• {c}" for c in contexts_a]) if contexts_a else "No context data."
+        ctx_b_chunk = "\n\n".join([f"• {c}" for c in contexts_b]) if contexts_b else "No context data."
+        prog, target_a_display, target_b_display, reset_lbl, reset_drill, reset_just = render_target_pair(unannotated_pairs, 0)
+        status_msg = f"Workspace loaded. Filtered {len(all_pairs) - len(unannotated_pairs)} already annotated items. {len(unannotated_pairs)} remaining."
+        return [
+            unannotated_pairs, 0,
+            prog, meta_a_str, target_a_display, ctx_a_chunk,
+            meta_b_str, target_b_display, ctx_b_chunk,
+            reset_lbl, reset_drill, reset_just,
+            status_msg,
+            gr.update(visible=len(unannotated_pairs) > 0) # Show workspace if items exist
+        ]
+    def update_drill(label):
+        choices = DRILL_DOWN_MAP.get(label, [])
+        return gr.update(choices=choices, value=choices[0] if len(choices) == 1 else None, interactive=len(choices)>0)
+    label_radio.change(fn=update_drill, inputs=label_radio, outputs=drill_down_dropdown)
+    # --- Save to Hugging Face ---
+    def save_action(idx, pairs, ctx_a_chunk, ctx_b_chunk, dom_a, pol_a, dom_b, pol_b, tar_col, ctx_col, label, drill_down, justification, user_tag, hf_df):
+        if not label or not drill_down:
+            return gr.update(value="⚠️ Error: Label and Drill-Down are required."), idx, hf_df
+        if idx >= len(pairs):
+            return gr.update(value="⚠️ End of list."), idx, hf_df
+        new_row = {
+            "Domain_A": dom_a,
+            "Sector_A": get_sector_for_policy(DOMAIN_MAP[dom_a], pol_a),
+            "Policy_A_Name": pol_a,
+            "Domain_B": dom_b,
+            "Sector_B": get_sector_for_policy(DOMAIN_MAP[dom_b], pol_b),
+            "Policy_B_Name": pol_b,
+            "Target_Column": tar_col,
+            "Target_A_Row": pairs[idx][0],
+            "Target_B_Row": pairs[idx][1],
+            "Context_Column": ctx_col,
+            "Context_A_Chunk": ctx_a_chunk,
+            "Context_B_Chunk": ctx_b_chunk,
+            "Coherence_Label": label,
+            "Drill_Down_Label": drill_down,
+            "Justification": justification.strip() if justification else "",
+            "AnnotatorUsername": user_tag
+        }
+        # 1. Update State DF
+        new_df = pd.DataFrame([new_row])
+        hf_df = pd.concat([hf_df, new_df], ignore_index=True)
+        # 2. Push to Hugging Face
+        try:
+            csv_buffer = io.StringIO()
+            hf_df.to_csv(csv_buffer, index=False)
+            csv_bytes = csv_buffer.getvalue().encode('utf-8')
+            api = HfApi()
+            api.upload_file(
+                path_or_fileobj=io.BytesIO(csv_bytes),
+                path_in_repo=HF_CSV_FILE,
+                repo_id=HF_DATASET_REPO,
+                token=HF_TOKEN,
+                repo_type="dataset"
+            )
+            log_msg = f"✅ Pair {idx + 1} saved to Hub by {user_tag}."
+        except Exception as e:
+            log_msg = f"❌ Error saving to Hub: {e}"
+        return gr.update(value=log_msg), idx + 1, hf_df
+    def skip_action(idx):
+        return gr.update(value=f"⏭️ Skipped Pair {idx + 1}."), idx + 1
+    # ==========================================
+    # 5. BUTTON WIRING
+    # ==========================================
+    load_btn.click(
+        fn=load_workspace,
+        inputs=[
+            domain_a_dd, policy_a_dd,
+            domain_b_dd, policy_b_dd,
+            target_col_dd, context_col_dd, hf_df_state
+        ],
+        outputs=[
+            target_pairs_state, current_index_state,
+            progress_text, meta_a, display_target_a, display_context_a,
+            meta_b, display_target_b, display_context_b,
+            label_radio, drill_down_dropdown, justification_box, status_box, workspace_box
+        ]
+    )
+    save_btn.click(
+        fn=save_action,
+        inputs=[
+            current_index_state, target_pairs_state, display_context_a, display_context_b,
+            domain_a_dd, policy_a_dd, domain_b_dd, policy_b_dd,
+            target_col_dd, context_col_dd,
+            label_radio, drill_down_dropdown, justification_box, user_tag_state, hf_df_state
+        ],
+        outputs=[status_box, current_index_state, hf_df_state]
+    ).then(
+        fn=render_target_pair,
+        inputs=[target_pairs_state, current_index_state],
+        outputs=[progress_text, display_target_a, display_target_b, label_radio, drill_down_dropdown, justification_box]
+    )
+    skip_btn.click(
+        fn=skip_action, inputs=[current_index_state], outputs=[status_box, current_index_state]
+    ).then(
+        fn=render_target_pair,
+        inputs=[target_pairs_state, current_index_state],
+        outputs=[progress_text, display_target_a, display_target_b, label_radio, drill_down_dropdown, justification_box]
+    )
+demo.launch(debug=True)