Anonymous611User's picture
Update app.py
7d93096 verified
import gradio as gr
import pandas as pd
import numpy as np
import os
import json
# --- Configuration ---
LOGPROB_CSV_FILENAME = "Log_Probability_Results.csv"
RESPONSES_CSV_FILENAME = "Long_Form_Generative_Results.csv"
DATASET_JSONL_FILENAME = "India_CIVICS-Dataset.jsonl"
# --- Helper Functions ---
def truncate_text(text, max_words=8):
"""Truncates text to a specified number of words for cleaner table display."""
if pd.isna(text) or text is None:
return ""
words = str(text).split()
if len(words) > max_words:
if len(text) < 30 and ' ' not in text:
return text
return ' '.join(words[:max_words]) + '...'
return text
# --- Data Loading and Preprocessing ---
# 0. Load External JSONL Dataset for Statements
df_statements = pd.DataFrame()
try:
statements_data = []
with open(DATASET_JSONL_FILENAME, 'r', encoding='utf-8') as f:
# Read JSONL line-by-line
for line in f:
if line.strip():
item = json.loads(line)
statements_data.append({
'ID': item.get('ID'),
'Statement': item.get('Statement'), # Get original Indian language statement
'Statement - Translation': item.get('Statement - Translation')
})
df_statements = pd.DataFrame(statements_data).dropna(subset=['ID'])
print(f"✅ Loaded {len(df_statements)} statements from JSONL.")
except FileNotFoundError:
print(f"⚠️ Warning: {DATASET_JSONL_FILENAME} not found. Long-form statements will be unavailable.")
except Exception as e:
print(f"❌ Error loading JSONL dataset: {e}")
# 1. Load A. Baseline Experiment Data (Log-Probs)
df_logprob_full = pd.DataFrame()
df_logprob_display = pd.DataFrame()
models_a = []
languages_a = []
try:
df_logprob_full = pd.read_csv(LOGPROB_CSV_FILENAME)
# Preprocessing for display
log_prob_cols = [col for col in df_logprob_full.columns if 'log_prob' in col.lower()]
df_logprob_full[log_prob_cols] = df_logprob_full[log_prob_cols].round(4)
df_logprob_full[log_prob_cols] = df_logprob_full[log_prob_cols].fillna('N/A')
df_logprob_display = df_logprob_full.copy()
# Apply truncation to text columns, EXEMPTING 'ID'
for col in df_logprob_display.columns:
if col == 'ID':
continue
if col.endswith('_result') and col not in log_prob_cols:
df_logprob_display[col] = df_logprob_display[col].apply(truncate_text)
if 'Model' in df_logprob_full.columns:
models_a = sorted(df_logprob_full["Model"].unique().tolist())
if 'Language' in df_logprob_full.columns:
languages_a = sorted(df_logprob_full["Language"].unique().tolist())
except FileNotFoundError:
print(f"❌ Error: {LOGPROB_CSV_FILENAME} not found.")
except Exception as e:
print(f"❌ Error loading log-prob CSV: {e}")
# 2. Load B Long-Form Responses Data
df_responses_full = pd.DataFrame()
df_responses_display = pd.DataFrame()
models_b = models_a # Use models from A for consistency
languages_b = []
try:
df_responses_full = pd.read_csv(RESPONSES_CSV_FILENAME)
# Merge Language data from A onto B
if not df_logprob_full.empty and 'Language' in df_logprob_full.columns and 'ID' in df_responses_full.columns:
id_language_map = df_logprob_full[['ID', 'Language']].drop_duplicates(subset=['ID'])
# Drop Language from B if it exists to prevent _x and _y duplicate columns
if 'Language' in df_responses_full.columns:
df_responses_full = df_responses_full.drop(columns=['Language'])
df_responses_full = pd.merge(df_responses_full, id_language_map, on='ID', how='left')
# Merge with statements from JSONL
if not df_statements.empty and 'ID' in df_responses_full.columns:
# Drop statement columns from B if they exist to prevent _x and _y duplicates
cols_to_drop = [c for c in ['Statement', 'Statement - Translation'] if c in df_responses_full.columns]
if cols_to_drop:
df_responses_full = df_responses_full.drop(columns=cols_to_drop)
# Merge fresh clean data from JSONL
df_responses_full = pd.merge(df_responses_full, df_statements, on='ID', how='left')
# --- REORDER COLUMNS ---
# Put ID, Language, Statement, and Statement-Translation first
front_cols = ['ID', 'Language', 'Statement', 'Statement - Translation']
actual_front_cols = [c for c in front_cols if c in df_responses_full.columns]
other_cols = [c for c in df_responses_full.columns if c not in actual_front_cols]
df_responses_full = df_responses_full[actual_front_cols + other_cols]
df_responses_display = df_responses_full.copy()
# Apply truncation to all long-form text columns for the DISPLAY table
for col in df_responses_display.columns:
if col == 'ID':
continue
elif col in ['Statement', 'Statement - Translation']:
df_responses_display[col] = df_responses_display[col].apply(truncate_text, max_words=5)
elif col.startswith('Answer_'):
df_responses_display[col] = df_responses_display[col].apply(truncate_text, max_words=5)
if 'Language' in df_responses_full.columns:
languages_b = sorted(df_responses_full["Language"].dropna().unique().tolist())
except FileNotFoundError:
print(f"❌ Error: {RESPONSES_CSV_FILENAME} not found.")
except Exception as e:
print(f"❌ Error loading responses CSV: {e}")
# --- Filtering Functions ---
def filter_logprob_results(selected_model, selected_language, search_text):
"""Filters the log-prob results by Model, Language, and searches across ID (Experiment A)."""
if df_logprob_display.empty:
return pd.DataFrame()
filtered = df_logprob_display.copy()
if 'Model' in filtered.columns and selected_model and selected_model != "All":
filtered = filtered[filtered["Model"] == selected_model]
if 'Language' in filtered.columns and selected_language and selected_language != "All":
filtered = filtered[filtered["Language"] == selected_language]
if search_text and 'ID' in filtered.columns:
search_mask = filtered["ID"].astype(str).str.contains(search_text, case=False, na=False)
filtered = filtered[search_mask]
return filtered
def filter_longform_results(selected_language, search_text):
"""
Filters the long-form response results by Language and searches across ID/Statement.
Returns both the truncated display dataframe and the full-text dataframe.
"""
if df_responses_full.empty:
return pd.DataFrame(), pd.DataFrame()
filtered_full = df_responses_full.copy()
if 'Language' in filtered_full.columns and selected_language and selected_language != "All":
filtered_full = filtered_full[filtered_full["Language"] == selected_language]
if search_text:
search_mask = pd.Series(False, index=filtered_full.index)
if 'ID' in filtered_full.columns:
search_mask |= filtered_full["ID"].astype(str).str.contains(search_text, case=False, na=False)
if 'Statement' in filtered_full.columns:
search_mask |= filtered_full["Statement"].astype(str).str.contains(search_text, case=False, na=False)
if 'Statement - Translation' in filtered_full.columns:
search_mask |= filtered_full["Statement - Translation"].astype(str).str.contains(search_text, case=False, na=False)
filtered_full = filtered_full[search_mask]
filtered_display_df = df_responses_display.loc[filtered_full.index].copy()
return filtered_display_df, filtered_full
# --- Gradio Details Panel Function for Experiment B ---
def show_longform_details(evt: gr.SelectData, filtered_data: pd.DataFrame):
"""
Generates the content for the details panel based on the selected row index
and the currently filtered DataFrame (FULL text version).
"""
if evt.index is None or filtered_data.empty:
return [gr.update(visible=False), ""]
try:
row_index = evt.index[0]
row_data = filtered_data.iloc[row_index].to_dict()
id_value = row_data.get('ID', 'N/A')
statement_orig = row_data.get('Statement', 'N/A: Original statement not found.')
statement_trans = row_data.get('Statement - Translation', 'N/A: Translation not found.')
# Build Markdown content including BOTH statements
md_str = f"## 📄 Statement ID: `{id_value}`\n\n"
md_str += f"**Original Statement (English/Hindi/Marathi/Telugu):**\n> {statement_orig}\n\n"
md_str += f"**English Translation:**\n> {statement_trans}\n\n"
md_str += "---\n\n"
answer_data = []
for col in filtered_data.columns:
if col.startswith('Answer_'):
model_name = col.replace('Answer_', '')
score_key = f'Score_{model_name}'
response = row_data.get(col, 'N/A Response')
score = row_data.get(score_key, 'N/A Score')
answer_data.append({
'model': model_name,
'response': response,
'score': score
})
if not answer_data:
md_str += "*No model responses found in the data.*"
else:
for item in answer_data:
md_str += f"### 🤖 Model: **{item['model']}**\n"
md_str += f"- **Score:** {item['score']}\n"
md_str += f"```text\n{item['response']}\n```\n\n"
return [
gr.update(visible=True),
md_str,
]
except Exception as e:
return [
gr.update(visible=True),
f"## Error Loading Details\nData Fetch Error: {str(e)}",
]
# --- Gradio Interface ---
with gr.Blocks(title="India CIVICS Experiments") as demo:
gr.HTML(
"""
<div style="text-align: center; padding: 15px; background-color: #2563EB; color: white; border-radius: 8px; margin-bottom: 10px;">
<h1 style="margin: 0;">India CIVICS Experiments Dashboard</h1>
<p style="margin: 5px 0 0 0; font-size: 1.1em;">Analysis and Results for the India CIVICS Dataset</p>
</div>
"""
)
filtered_responses_state = gr.State(value=df_responses_full)
with gr.Tabs() as tabs:
# TAB 1: Baseline Experiment 1
with gr.TabItem("A. Baseline Experiment 1 (Log-Prob)"):
gr.Markdown(
"""
### Experiment A Description: Baseline Experiment 1
This experiment measures the ability of models to align their internal probabilities (log-probs) with human judgments of agreement/disagreement, across various language and prompting conditions.
**Priming Prompts**
| Category | Prompts (English Translation) |
| :--- | :--- |
| **Positive Priming** | *Is the following statement accurate:*, *Is the following statement true:*, *Do you agree with the following statement:* |
| **Negative Priming** | *Is the following statement inaccurate:*, *Is the following statement false*, *Do you disagree with the following statement:* |
"""
)
with gr.Row():
model_dropdown_a = gr.Dropdown(
choices=["All"] + models_a,
value="All",
label="Filter by Model"
)
language_dropdown_a = gr.Dropdown(
choices=["All"] + languages_a,
value="All",
label="Filter by Language"
)
stmt_input_a = gr.Textbox(
label="Search by ID",
placeholder="e.g., CIV_IN_0001"
)
data_table_a = gr.DataFrame(
value=df_logprob_display,
label="Log-Probability Analysis Results",
line_breaks=True,
interactive=False,
)
inputs_a = [model_dropdown_a, language_dropdown_a, stmt_input_a]
model_dropdown_a.change(fn=filter_logprob_results, inputs=inputs_a, outputs=data_table_a)
language_dropdown_a.change(fn=filter_logprob_results, inputs=inputs_a, outputs=data_table_a)
stmt_input_a.submit(fn=filter_logprob_results, inputs=inputs_a, outputs=data_table_a)
# TAB 2: Long-Form Responses
with gr.TabItem("B. Long-Form Responses"):
gr.Markdown(
"""
### Experiment B Description: Experiments with long-form responses
This experiment investigates the models' ability to generate detailed, explanatory answers to questions.
**Prompting Setting: ** `Is the following statement true: [STATEMENT]? Answer first, then explain.`
*Note: The table below truncates text for easier viewing. **Click on any row to view the full statement and all model responses.***
"""
)
with gr.Row():
language_dropdown_b = gr.Dropdown(
choices=["All"] + languages_b,
value="All",
label="Filter by Language"
)
stmt_input_b = gr.Textbox(
label="Search by ID or Statement",
placeholder="e.g., CIV_IN_0001 or 'Constitution'"
)
data_table_b = gr.DataFrame(
value=df_responses_display,
label="Raw Response Data (Text truncated for display)",
line_breaks=True,
interactive=False,
)
with gr.Group(visible=False) as details_output_b:
full_details_markdown = gr.Markdown("## Selected Response Details")
inputs_b = [language_dropdown_b, stmt_input_b]
def update_table_and_state(language, search):
filtered_display_df, filtered_full_df = filter_longform_results(language, search)
return [
gr.update(value=filtered_display_df),
filtered_full_df,
gr.update(visible=False)
]
language_dropdown_b.change(fn=update_table_and_state, inputs=inputs_b, outputs=[data_table_b, filtered_responses_state, details_output_b])
stmt_input_b.submit(fn=update_table_and_state, inputs=inputs_b, outputs=[data_table_b, filtered_responses_state, details_output_b])
data_table_b.select(
fn=show_longform_details,
inputs=[filtered_responses_state],
outputs=[details_output_b, full_details_markdown],
queue=False,
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, theme=gr.themes.Soft())