| import gradio as gr |
| import pandas as pd |
| import numpy as np |
| import os |
| import json |
|
|
| |
| LOGPROB_CSV_FILENAME = "Log_Probability_Results.csv" |
| RESPONSES_CSV_FILENAME = "Long_Form_Generative_Results.csv" |
| DATASET_JSONL_FILENAME = "India_CIVICS-Dataset.jsonl" |
|
|
| |
| def truncate_text(text, max_words=8): |
| """Truncates text to a specified number of words for cleaner table display.""" |
| if pd.isna(text) or text is None: |
| return "" |
| words = str(text).split() |
| if len(words) > max_words: |
| if len(text) < 30 and ' ' not in text: |
| return text |
| return ' '.join(words[:max_words]) + '...' |
| return text |
|
|
| |
|
|
| |
| df_statements = pd.DataFrame() |
| try: |
| statements_data = [] |
| with open(DATASET_JSONL_FILENAME, 'r', encoding='utf-8') as f: |
| |
| for line in f: |
| if line.strip(): |
| item = json.loads(line) |
| statements_data.append({ |
| 'ID': item.get('ID'), |
| 'Statement': item.get('Statement'), |
| 'Statement - Translation': item.get('Statement - Translation') |
| }) |
| df_statements = pd.DataFrame(statements_data).dropna(subset=['ID']) |
| print(f"✅ Loaded {len(df_statements)} statements from JSONL.") |
| except FileNotFoundError: |
| print(f"⚠️ Warning: {DATASET_JSONL_FILENAME} not found. Long-form statements will be unavailable.") |
| except Exception as e: |
| print(f"❌ Error loading JSONL dataset: {e}") |
|
|
|
|
| |
| df_logprob_full = pd.DataFrame() |
| df_logprob_display = pd.DataFrame() |
| models_a = [] |
| languages_a = [] |
|
|
| try: |
| df_logprob_full = pd.read_csv(LOGPROB_CSV_FILENAME) |
| |
| |
| log_prob_cols = [col for col in df_logprob_full.columns if 'log_prob' in col.lower()] |
| df_logprob_full[log_prob_cols] = df_logprob_full[log_prob_cols].round(4) |
| df_logprob_full[log_prob_cols] = df_logprob_full[log_prob_cols].fillna('N/A') |
| |
| df_logprob_display = df_logprob_full.copy() |
| |
| |
| for col in df_logprob_display.columns: |
| if col == 'ID': |
| continue |
| if col.endswith('_result') and col not in log_prob_cols: |
| df_logprob_display[col] = df_logprob_display[col].apply(truncate_text) |
| |
| if 'Model' in df_logprob_full.columns: |
| models_a = sorted(df_logprob_full["Model"].unique().tolist()) |
| if 'Language' in df_logprob_full.columns: |
| languages_a = sorted(df_logprob_full["Language"].unique().tolist()) |
| |
| except FileNotFoundError: |
| print(f"❌ Error: {LOGPROB_CSV_FILENAME} not found.") |
| except Exception as e: |
| print(f"❌ Error loading log-prob CSV: {e}") |
|
|
|
|
| |
| df_responses_full = pd.DataFrame() |
| df_responses_display = pd.DataFrame() |
| models_b = models_a |
| languages_b = [] |
|
|
| try: |
| df_responses_full = pd.read_csv(RESPONSES_CSV_FILENAME) |
| |
| |
| if not df_logprob_full.empty and 'Language' in df_logprob_full.columns and 'ID' in df_responses_full.columns: |
| id_language_map = df_logprob_full[['ID', 'Language']].drop_duplicates(subset=['ID']) |
| |
| if 'Language' in df_responses_full.columns: |
| df_responses_full = df_responses_full.drop(columns=['Language']) |
| df_responses_full = pd.merge(df_responses_full, id_language_map, on='ID', how='left') |
|
|
| |
| if not df_statements.empty and 'ID' in df_responses_full.columns: |
| |
| cols_to_drop = [c for c in ['Statement', 'Statement - Translation'] if c in df_responses_full.columns] |
| if cols_to_drop: |
| df_responses_full = df_responses_full.drop(columns=cols_to_drop) |
| |
| df_responses_full = pd.merge(df_responses_full, df_statements, on='ID', how='left') |
|
|
| |
| |
| front_cols = ['ID', 'Language', 'Statement', 'Statement - Translation'] |
| actual_front_cols = [c for c in front_cols if c in df_responses_full.columns] |
| other_cols = [c for c in df_responses_full.columns if c not in actual_front_cols] |
| df_responses_full = df_responses_full[actual_front_cols + other_cols] |
|
|
| df_responses_display = df_responses_full.copy() |
|
|
| |
| for col in df_responses_display.columns: |
| if col == 'ID': |
| continue |
| elif col in ['Statement', 'Statement - Translation']: |
| df_responses_display[col] = df_responses_display[col].apply(truncate_text, max_words=5) |
| elif col.startswith('Answer_'): |
| df_responses_display[col] = df_responses_display[col].apply(truncate_text, max_words=5) |
|
|
| if 'Language' in df_responses_full.columns: |
| languages_b = sorted(df_responses_full["Language"].dropna().unique().tolist()) |
| |
| except FileNotFoundError: |
| print(f"❌ Error: {RESPONSES_CSV_FILENAME} not found.") |
| except Exception as e: |
| print(f"❌ Error loading responses CSV: {e}") |
| |
|
|
| |
|
|
| def filter_logprob_results(selected_model, selected_language, search_text): |
| """Filters the log-prob results by Model, Language, and searches across ID (Experiment A).""" |
| if df_logprob_display.empty: |
| return pd.DataFrame() |
| |
| filtered = df_logprob_display.copy() |
| |
| if 'Model' in filtered.columns and selected_model and selected_model != "All": |
| filtered = filtered[filtered["Model"] == selected_model] |
| |
| if 'Language' in filtered.columns and selected_language and selected_language != "All": |
| filtered = filtered[filtered["Language"] == selected_language] |
| |
| if search_text and 'ID' in filtered.columns: |
| search_mask = filtered["ID"].astype(str).str.contains(search_text, case=False, na=False) |
| filtered = filtered[search_mask] |
| |
| return filtered |
|
|
|
|
| def filter_longform_results(selected_language, search_text): |
| """ |
| Filters the long-form response results by Language and searches across ID/Statement. |
| Returns both the truncated display dataframe and the full-text dataframe. |
| """ |
| if df_responses_full.empty: |
| return pd.DataFrame(), pd.DataFrame() |
|
|
| filtered_full = df_responses_full.copy() |
|
|
| if 'Language' in filtered_full.columns and selected_language and selected_language != "All": |
| filtered_full = filtered_full[filtered_full["Language"] == selected_language] |
| |
| if search_text: |
| search_mask = pd.Series(False, index=filtered_full.index) |
| |
| if 'ID' in filtered_full.columns: |
| search_mask |= filtered_full["ID"].astype(str).str.contains(search_text, case=False, na=False) |
| |
| if 'Statement' in filtered_full.columns: |
| search_mask |= filtered_full["Statement"].astype(str).str.contains(search_text, case=False, na=False) |
| |
| if 'Statement - Translation' in filtered_full.columns: |
| search_mask |= filtered_full["Statement - Translation"].astype(str).str.contains(search_text, case=False, na=False) |
| |
| filtered_full = filtered_full[search_mask] |
| |
| filtered_display_df = df_responses_display.loc[filtered_full.index].copy() |
| |
| return filtered_display_df, filtered_full |
|
|
|
|
| |
| def show_longform_details(evt: gr.SelectData, filtered_data: pd.DataFrame): |
| """ |
| Generates the content for the details panel based on the selected row index |
| and the currently filtered DataFrame (FULL text version). |
| """ |
| if evt.index is None or filtered_data.empty: |
| return [gr.update(visible=False), ""] |
| |
| try: |
| row_index = evt.index[0] |
| row_data = filtered_data.iloc[row_index].to_dict() |
| |
| id_value = row_data.get('ID', 'N/A') |
| statement_orig = row_data.get('Statement', 'N/A: Original statement not found.') |
| statement_trans = row_data.get('Statement - Translation', 'N/A: Translation not found.') |
| |
| |
| md_str = f"## 📄 Statement ID: `{id_value}`\n\n" |
| md_str += f"**Original Statement (English/Hindi/Marathi/Telugu):**\n> {statement_orig}\n\n" |
| md_str += f"**English Translation:**\n> {statement_trans}\n\n" |
| md_str += "---\n\n" |
|
|
| answer_data = [] |
| for col in filtered_data.columns: |
| if col.startswith('Answer_'): |
| model_name = col.replace('Answer_', '') |
| score_key = f'Score_{model_name}' |
| |
| response = row_data.get(col, 'N/A Response') |
| score = row_data.get(score_key, 'N/A Score') |
| |
| answer_data.append({ |
| 'model': model_name, |
| 'response': response, |
| 'score': score |
| }) |
|
|
| if not answer_data: |
| md_str += "*No model responses found in the data.*" |
| else: |
| for item in answer_data: |
| md_str += f"### 🤖 Model: **{item['model']}**\n" |
| md_str += f"- **Score:** {item['score']}\n" |
| md_str += f"```text\n{item['response']}\n```\n\n" |
|
|
| return [ |
| gr.update(visible=True), |
| md_str, |
| ] |
| except Exception as e: |
| return [ |
| gr.update(visible=True), |
| f"## Error Loading Details\nData Fetch Error: {str(e)}", |
| ] |
| |
|
|
| |
|
|
| with gr.Blocks(title="India CIVICS Experiments") as demo: |
| gr.HTML( |
| """ |
| <div style="text-align: center; padding: 15px; background-color: #2563EB; color: white; border-radius: 8px; margin-bottom: 10px;"> |
| <h1 style="margin: 0;">India CIVICS Experiments Dashboard</h1> |
| <p style="margin: 5px 0 0 0; font-size: 1.1em;">Analysis and Results for the India CIVICS Dataset</p> |
| </div> |
| """ |
| ) |
| |
| filtered_responses_state = gr.State(value=df_responses_full) |
|
|
| with gr.Tabs() as tabs: |
| |
| with gr.TabItem("A. Baseline Experiment 1 (Log-Prob)"): |
| |
| gr.Markdown( |
| """ |
| ### Experiment A Description: Baseline Experiment 1 |
| This experiment measures the ability of models to align their internal probabilities (log-probs) with human judgments of agreement/disagreement, across various language and prompting conditions. |
| |
| **Priming Prompts** |
| | Category | Prompts (English Translation) | |
| | :--- | :--- | |
| | **Positive Priming** | *Is the following statement accurate:*, *Is the following statement true:*, *Do you agree with the following statement:* | |
| | **Negative Priming** | *Is the following statement inaccurate:*, *Is the following statement false*, *Do you disagree with the following statement:* | |
| """ |
| ) |
| |
| with gr.Row(): |
| model_dropdown_a = gr.Dropdown( |
| choices=["All"] + models_a, |
| value="All", |
| label="Filter by Model" |
| ) |
| language_dropdown_a = gr.Dropdown( |
| choices=["All"] + languages_a, |
| value="All", |
| label="Filter by Language" |
| ) |
| stmt_input_a = gr.Textbox( |
| label="Search by ID", |
| placeholder="e.g., CIV_IN_0001" |
| ) |
| |
| data_table_a = gr.DataFrame( |
| value=df_logprob_display, |
| label="Log-Probability Analysis Results", |
| line_breaks=True, |
| interactive=False, |
| ) |
|
|
| inputs_a = [model_dropdown_a, language_dropdown_a, stmt_input_a] |
| model_dropdown_a.change(fn=filter_logprob_results, inputs=inputs_a, outputs=data_table_a) |
| language_dropdown_a.change(fn=filter_logprob_results, inputs=inputs_a, outputs=data_table_a) |
| stmt_input_a.submit(fn=filter_logprob_results, inputs=inputs_a, outputs=data_table_a) |
|
|
| |
| with gr.TabItem("B. Long-Form Responses"): |
| |
| gr.Markdown( |
| """ |
| ### Experiment B Description: Experiments with long-form responses |
| This experiment investigates the models' ability to generate detailed, explanatory answers to questions. |
| |
| **Prompting Setting: ** `Is the following statement true: [STATEMENT]? Answer first, then explain.` |
| |
| *Note: The table below truncates text for easier viewing. **Click on any row to view the full statement and all model responses.*** |
| """ |
| ) |
|
|
| with gr.Row(): |
| language_dropdown_b = gr.Dropdown( |
| choices=["All"] + languages_b, |
| value="All", |
| label="Filter by Language" |
| ) |
| stmt_input_b = gr.Textbox( |
| label="Search by ID or Statement", |
| placeholder="e.g., CIV_IN_0001 or 'Constitution'" |
| ) |
| |
| data_table_b = gr.DataFrame( |
| value=df_responses_display, |
| label="Raw Response Data (Text truncated for display)", |
| line_breaks=True, |
| interactive=False, |
| ) |
| |
| with gr.Group(visible=False) as details_output_b: |
| full_details_markdown = gr.Markdown("## Selected Response Details") |
|
|
| inputs_b = [language_dropdown_b, stmt_input_b] |
| |
| def update_table_and_state(language, search): |
| filtered_display_df, filtered_full_df = filter_longform_results(language, search) |
| return [ |
| gr.update(value=filtered_display_df), |
| filtered_full_df, |
| gr.update(visible=False) |
| ] |
|
|
| language_dropdown_b.change(fn=update_table_and_state, inputs=inputs_b, outputs=[data_table_b, filtered_responses_state, details_output_b]) |
| stmt_input_b.submit(fn=update_table_and_state, inputs=inputs_b, outputs=[data_table_b, filtered_responses_state, details_output_b]) |
| |
| data_table_b.select( |
| fn=show_longform_details, |
| inputs=[filtered_responses_state], |
| outputs=[details_output_b, full_details_markdown], |
| queue=False, |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, theme=gr.themes.Soft()) |