Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| # --- HELPER: Data Loading --- | |
| def load_crows_pairs(): | |
| """Fetches raw CSV from NYU Github.""" | |
| url = "crows_pairs_anonymized.csv" | |
| try: | |
| df = pd.read_csv(url) | |
| # df = df.rename(columns={ | |
| # "sent_more": "Stereotype Sentence", | |
| # "sent_less": "Anti-Stereotype Sentence", | |
| # "bias_type": "Bias Category", | |
| # "stereo_antistereo": "Stereo_Antistereo" | |
| # }) | |
| return df | |
| except Exception as e: | |
| return pd.DataFrame({"Error": [f"Failed to load data: {str(e)}"]}) | |
| # --- HELPER: HTML Content --- | |
| SECTION_TITLE_STYLE = "font-family: sans-serif; font-size: 28px; margin-bottom: 20px; color: #111; border-bottom: 3px solid #2563eb; padding-bottom: 10px;" | |
| SUB_TITLE_STYLE = "font-family: sans-serif; font-size: 22px; margin-top: 30px; margin-bottom: 10px; color: #333; border-bottom: 1px solid #ddd; padding-bottom: 5px;" | |
| TEXT_STYLE = "font-family: sans-serif; font-size: 16px; line-height: 1.6; color: #444;" | |
| LINK_STYLE = "color: #2563eb; text-decoration: none; font-weight: bold;" | |
| # 1. Main Info Block (Structure added back!) | |
| info_html = f""" | |
| <div style="{TEXT_STYLE}"> | |
| <h2 style="{SECTION_TITLE_STYLE}">π Section 1: Summary</h2> | |
| <h3 style="{SUB_TITLE_STYLE}">Dataset Info</h3> | |
| <ul style="list-style-type: none; padding-left: 0;"> | |
| <li><strong>Repo:</strong> <a href="https://github.com/nyu-mll/crows-pairs" target="_blank" style="{LINK_STYLE}">nyu-mll/crows-pairs</a></li> | |
| <li><strong>Paper:</strong> <a href="https://aclanthology.org/2020.emnlp-main.154.pdf" target="_blank" style="{LINK_STYLE}">Nangia et al. (2020)</a></li> | |
| <li><strong>License:</strong> <a href="https://github.com/nyu-mll/crows-pairs?tab=readme-ov-file#license" target="_blank" style="{LINK_STYLE}">CC-BY-SA 4.0</a></li> | |
| </ul> | |
| <h3 style="{SUB_TITLE_STYLE}">Structure</h3> | |
| <p>The dataset contains <strong>1,508 pairs</strong> of sentences testing social bias (Race, Gender, Religion, etc.).</p> | |
| <h3 style="{SUB_TITLE_STYLE}">Evaluation Methods (2025)</h3> | |
| <div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; border-left: 5px solid #2563eb;"> | |
| <p style="margin-bottom: 15px;"> | |
| <strong>1. The Log-Probability Method (Standard)</strong><br> | |
| <em style="color: #666;">Source: KLAAD (Kim et al., 2025)</em><br> | |
| "Following the methodology proposed by Nangia et al. (2020), we evaluate model preference between the two sentences based on their log-probabilities." | |
| <br><strong>Target: 50.0</strong> | |
| </p> | |
| <hr style="border-top: 1px solid #d1d5db; margin: 15px 0;"> | |
| <p style="margin-bottom: 0;"> | |
| <strong>2. The Cloze Adaptation (New)</strong><br> | |
| <em style="color: #666;">Source: Kumar et al. (2025)</em><br> | |
| Converts rows into Multiple Choice Questions (e.g., "Choice A: Man, Choice B: Woman"). | |
| </p> | |
| </div> | |
| </div> | |
| """ | |
| # 2. Key Sources Block (Citations) | |
| sources_html = f""" | |
| <div style="{TEXT_STYLE}"> | |
| <h3 style="{SUB_TITLE_STYLE}">Key Sources</h3> | |
| <ol style="padding-left: 20px;"> | |
| <li style="margin-bottom: 5px;"><strong>Original:</strong> <a href="https://aclanthology.org/2020.emnlp-main.154.pdf" target="_blank" style="{LINK_STYLE}">Nangia et al. (2020)</a></li> | |
| <li style="margin-bottom: 5px;"><strong>Scores:</strong> <a href="https://arxiv.org/pdf/2507.19962" target="_blank" style="{LINK_STYLE}">KLAAD: Refining Attention... (Kim et al., 2025)</a></li> | |
| <li style="margin-bottom: 5px;"><strong>New Prompting:</strong> <a href="https://arxiv.org/pdf/2503.11985" target="_blank" style="{LINK_STYLE}">No LLM is Free From Bias (Kumar et al., 2025)</a></li> | |
| </ol> | |
| </div> | |
| """ | |
| viewer_header_html = f""" | |
| <div style="{TEXT_STYLE}"> | |
| <h2 style="{SECTION_TITLE_STYLE}">π Section 2: Dataset Viewer</h2> | |
| </div> | |
| """ | |
| # --- MAIN RENDER FUNCTION --- | |
| def render_crows_pairs(): | |
| """ | |
| Returns the Page (Column) and the Back Button for wiring. | |
| """ | |
| with gr.Column(visible=False) as crows_page: | |
| # 1. Main Title | |
| gr.HTML(""" | |
| <h1 style="font-family: sans-serif; text-align: center; margin-bottom: 25px; font-size: 36px; color: #111;"> | |
| βοΈ Bias Benchmark Inspector: CrowS-Pairs | |
| </h1> | |
| """) | |
| # 2. Content Tabs | |
| with gr.Tabs(): | |
| # TAB 1: SUMMARY | |
| with gr.TabItem("Summary"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| # A. Main Info (Methods + Structure) | |
| gr.HTML(info_html) | |
| # B. Benchmark Table | |
| benchmark_df = pd.DataFrame([ | |
| ["Ideal Score", "50.00"], | |
| ["Llama-3.2-3B", "65.47"], | |
| ["Gemma-2-2B", "64.58"], | |
| ], columns=["Model", "Score"]) | |
| gr.Dataframe(value=benchmark_df, interactive=False) | |
| # C. Sources Footer (Citations) | |
| gr.HTML(sources_html) | |
| # TAB 2: VIEWER | |
| with gr.TabItem("Dataset Viewer"): | |
| gr.HTML(viewer_header_html) | |
| load_btn = gr.Button("π Load Dataset (1,508 Rows)", variant="primary") | |
| data_table = gr.Dataframe( | |
| headers=["Stereotype Sentence", "Anti-Stereotype Sentence", "Bias Category"], | |
| interactive=False | |
| ) | |
| load_btn.click(fn=load_crows_pairs, inputs=None, outputs=data_table) | |
| # 3. Spacer & Back Button (At the Bottom) | |
| gr.Markdown("<br><br>") | |
| with gr.Row(): | |
| btn_back = gr.Button("β¬ οΈ Back to Hub", variant="secondary", size="lg") | |
| return crows_page, btn_back |