BYO-community-v2 / crows_pairs.py
sefif's picture
Update crows_pairs.py
0e3e61e verified
import gradio as gr
import pandas as pd
# --- HELPER: Data Loading ---
def load_crows_pairs():
"""Fetches raw CSV from NYU Github."""
url = "crows_pairs_anonymized.csv"
try:
df = pd.read_csv(url)
# df = df.rename(columns={
# "sent_more": "Stereotype Sentence",
# "sent_less": "Anti-Stereotype Sentence",
# "bias_type": "Bias Category",
# "stereo_antistereo": "Stereo_Antistereo"
# })
return df
except Exception as e:
return pd.DataFrame({"Error": [f"Failed to load data: {str(e)}"]})
# --- HELPER: HTML Content ---
SECTION_TITLE_STYLE = "font-family: sans-serif; font-size: 28px; margin-bottom: 20px; color: #111; border-bottom: 3px solid #2563eb; padding-bottom: 10px;"
SUB_TITLE_STYLE = "font-family: sans-serif; font-size: 22px; margin-top: 30px; margin-bottom: 10px; color: #333; border-bottom: 1px solid #ddd; padding-bottom: 5px;"
TEXT_STYLE = "font-family: sans-serif; font-size: 16px; line-height: 1.6; color: #444;"
LINK_STYLE = "color: #2563eb; text-decoration: none; font-weight: bold;"
# 1. Main Info Block (Structure added back!)
info_html = f"""
<div style="{TEXT_STYLE}">
<h2 style="{SECTION_TITLE_STYLE}">πŸ“„ Section 1: Summary</h2>
<h3 style="{SUB_TITLE_STYLE}">Dataset Info</h3>
<ul style="list-style-type: none; padding-left: 0;">
<li><strong>Repo:</strong> <a href="https://github.com/nyu-mll/crows-pairs" target="_blank" style="{LINK_STYLE}">nyu-mll/crows-pairs</a></li>
<li><strong>Paper:</strong> <a href="https://aclanthology.org/2020.emnlp-main.154.pdf" target="_blank" style="{LINK_STYLE}">Nangia et al. (2020)</a></li>
<li><strong>License:</strong> <a href="https://github.com/nyu-mll/crows-pairs?tab=readme-ov-file#license" target="_blank" style="{LINK_STYLE}">CC-BY-SA 4.0</a></li>
</ul>
<h3 style="{SUB_TITLE_STYLE}">Structure</h3>
<p>The dataset contains <strong>1,508 pairs</strong> of sentences testing social bias (Race, Gender, Religion, etc.).</p>
<h3 style="{SUB_TITLE_STYLE}">Evaluation Methods (2025)</h3>
<div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; border-left: 5px solid #2563eb;">
<p style="margin-bottom: 15px;">
<strong>1. The Log-Probability Method (Standard)</strong><br>
<em style="color: #666;">Source: KLAAD (Kim et al., 2025)</em><br>
"Following the methodology proposed by Nangia et al. (2020), we evaluate model preference between the two sentences based on their log-probabilities."
<br><strong>Target: 50.0</strong>
</p>
<hr style="border-top: 1px solid #d1d5db; margin: 15px 0;">
<p style="margin-bottom: 0;">
<strong>2. The Cloze Adaptation (New)</strong><br>
<em style="color: #666;">Source: Kumar et al. (2025)</em><br>
Converts rows into Multiple Choice Questions (e.g., "Choice A: Man, Choice B: Woman").
</p>
</div>
</div>
"""
# 2. Key Sources Block (Citations)
sources_html = f"""
<div style="{TEXT_STYLE}">
<h3 style="{SUB_TITLE_STYLE}">Key Sources</h3>
<ol style="padding-left: 20px;">
<li style="margin-bottom: 5px;"><strong>Original:</strong> <a href="https://aclanthology.org/2020.emnlp-main.154.pdf" target="_blank" style="{LINK_STYLE}">Nangia et al. (2020)</a></li>
<li style="margin-bottom: 5px;"><strong>Scores:</strong> <a href="https://arxiv.org/pdf/2507.19962" target="_blank" style="{LINK_STYLE}">KLAAD: Refining Attention... (Kim et al., 2025)</a></li>
<li style="margin-bottom: 5px;"><strong>New Prompting:</strong> <a href="https://arxiv.org/pdf/2503.11985" target="_blank" style="{LINK_STYLE}">No LLM is Free From Bias (Kumar et al., 2025)</a></li>
</ol>
</div>
"""
viewer_header_html = f"""
<div style="{TEXT_STYLE}">
<h2 style="{SECTION_TITLE_STYLE}">πŸ” Section 2: Dataset Viewer</h2>
</div>
"""
# --- MAIN RENDER FUNCTION ---
def render_crows_pairs():
"""
Returns the Page (Column) and the Back Button for wiring.
"""
with gr.Column(visible=False) as crows_page:
# 1. Main Title
gr.HTML("""
<h1 style="font-family: sans-serif; text-align: center; margin-bottom: 25px; font-size: 36px; color: #111;">
βš–οΈ Bias Benchmark Inspector: CrowS-Pairs
</h1>
""")
# 2. Content Tabs
with gr.Tabs():
# TAB 1: SUMMARY
with gr.TabItem("Summary"):
with gr.Row():
with gr.Column():
# A. Main Info (Methods + Structure)
gr.HTML(info_html)
# B. Benchmark Table
benchmark_df = pd.DataFrame([
["Ideal Score", "50.00"],
["Llama-3.2-3B", "65.47"],
["Gemma-2-2B", "64.58"],
], columns=["Model", "Score"])
gr.Dataframe(value=benchmark_df, interactive=False)
# C. Sources Footer (Citations)
gr.HTML(sources_html)
# TAB 2: VIEWER
with gr.TabItem("Dataset Viewer"):
gr.HTML(viewer_header_html)
load_btn = gr.Button("πŸ“‚ Load Dataset (1,508 Rows)", variant="primary")
data_table = gr.Dataframe(
headers=["Stereotype Sentence", "Anti-Stereotype Sentence", "Bias Category"],
interactive=False
)
load_btn.click(fn=load_crows_pairs, inputs=None, outputs=data_table)
# 3. Spacer & Back Button (At the Bottom)
gr.Markdown("<br><br>")
with gr.Row():
btn_back = gr.Button("⬅️ Back to Hub", variant="secondary", size="lg")
return crows_page, btn_back