Spaces:
Sleeping
Sleeping
updated app
Browse files
app.py
CHANGED
|
@@ -77,11 +77,11 @@ def load_response_data() -> Dict[str, pd.DataFrame]:
|
|
| 77 |
def get_questions_for_domain(domain):
|
| 78 |
"""Get list of questions for selected domain."""
|
| 79 |
if "responses" not in DB or domain not in DB["responses"]:
|
| 80 |
-
return []
|
| 81 |
|
| 82 |
df = DB["responses"][domain]
|
| 83 |
questions = df['question'].unique().tolist()
|
| 84 |
-
return questions
|
| 85 |
|
| 86 |
def get_response_details(domain, question):
|
| 87 |
"""Get LLM answer, gold answer, and metrics for selected question."""
|
|
@@ -139,28 +139,63 @@ def get_dataset_choices():
|
|
| 139 |
return []
|
| 140 |
|
| 141 |
def get_data_preview():
|
| 142 |
-
"""Returns separate dataframes for each domain."""
|
| 143 |
if "data" not in DB:
|
| 144 |
-
return {}, {}, {}, {}
|
| 145 |
|
| 146 |
df = DB["data"]
|
| 147 |
# Remove failed_samples column if it exists
|
| 148 |
if 'failed_samples' in df.columns:
|
| 149 |
df = df.drop(columns=['failed_samples'])
|
| 150 |
|
| 151 |
-
|
|
|
|
| 152 |
|
| 153 |
-
#
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
results[ds] = df[df['dataset_name'] == ds]
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
-
return domain_dfs[0], domain_dfs[1], domain_dfs[2], domain_dfs[3]
|
| 164 |
|
| 165 |
def get_domain_state(dataset):
|
| 166 |
empty_update = gr.update(visible=False, value=None, choices=[])
|
|
@@ -433,14 +468,28 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
|
|
| 433 |
# TAB 2: Data Inspector
|
| 434 |
with gr.TabItem("Data Preview"):
|
| 435 |
gr.Markdown("### All Test Configurations by Domain")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
gr.Markdown("**Biomedical (PubMedQA)**")
|
| 437 |
preview_table_1 = gr.Dataframe(interactive=False, wrap=True)
|
| 438 |
-
gr.Markdown("**
|
| 439 |
preview_table_2 = gr.Dataframe(interactive=False, wrap=True)
|
| 440 |
-
gr.Markdown("**
|
| 441 |
preview_table_3 = gr.Dataframe(interactive=False, wrap=True)
|
| 442 |
-
gr.Markdown("**
|
| 443 |
preview_table_4 = gr.Dataframe(interactive=False, wrap=True)
|
|
|
|
|
|
|
| 444 |
preview_btn = gr.Button("Refresh Data Preview")
|
| 445 |
|
| 446 |
# TAB 3: Comparison
|
|
@@ -534,7 +583,7 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
|
|
| 534 |
)
|
| 535 |
|
| 536 |
# Debug Preview Events
|
| 537 |
-
preview_btn.click(get_data_preview, inputs=None, outputs=[preview_table_1, preview_table_2, preview_table_3, preview_table_4])
|
| 538 |
|
| 539 |
refresh_btn.click(
|
| 540 |
generate_inter_domain_comparison,
|
|
|
|
| 77 |
def get_questions_for_domain(domain):
|
| 78 |
"""Get list of questions for selected domain."""
|
| 79 |
if "responses" not in DB or domain not in DB["responses"]:
|
| 80 |
+
return gr.update(choices=[], value=None)
|
| 81 |
|
| 82 |
df = DB["responses"][domain]
|
| 83 |
questions = df['question'].unique().tolist()
|
| 84 |
+
return gr.update(choices=questions, value=None)
|
| 85 |
|
| 86 |
def get_response_details(domain, question):
|
| 87 |
"""Get LLM answer, gold answer, and metrics for selected question."""
|
|
|
|
| 139 |
return []
|
| 140 |
|
| 141 |
def get_data_preview():
|
| 142 |
+
"""Returns separate dataframes for each domain with columns reordered by type."""
|
| 143 |
if "data" not in DB:
|
| 144 |
+
return {}, {}, {}, {}, {}
|
| 145 |
|
| 146 |
df = DB["data"]
|
| 147 |
# Remove failed_samples column if it exists
|
| 148 |
if 'failed_samples' in df.columns:
|
| 149 |
df = df.drop(columns=['failed_samples'])
|
| 150 |
|
| 151 |
+
# Define explicit domain order matching the UI
|
| 152 |
+
domain_order = ['pubmedqa', 'techqa', 'finqa', 'msmarco', 'cuad']
|
| 153 |
|
| 154 |
+
# Metric columns (Results)
|
| 155 |
+
result_cols = ['rmse_relevance', 'rmse_utilization', 'rmse_completeness', 'f1_score', 'aucroc']
|
| 156 |
+
metadata_cols = ['test_id', 'config_purpose', 'dataset_name']
|
|
|
|
| 157 |
|
| 158 |
+
domain_dfs = []
|
| 159 |
+
for ds in domain_order:
|
| 160 |
+
domain_df = df[df['dataset_name'] == ds].copy()
|
| 161 |
+
|
| 162 |
+
if domain_df.empty:
|
| 163 |
+
domain_dfs.append(pd.DataFrame())
|
| 164 |
+
continue
|
| 165 |
+
|
| 166 |
+
# Analyze constants and variables
|
| 167 |
+
consts, variables = analyze_domain_configs(domain_df)
|
| 168 |
+
|
| 169 |
+
# Reorder columns: Metadata -> Constants -> Variables -> Results
|
| 170 |
+
ordered_cols = []
|
| 171 |
+
|
| 172 |
+
# Add metadata columns first
|
| 173 |
+
for col in metadata_cols:
|
| 174 |
+
if col in domain_df.columns:
|
| 175 |
+
ordered_cols.append(col)
|
| 176 |
+
|
| 177 |
+
# Add constant columns (sorted)
|
| 178 |
+
const_cols = sorted([col for col in consts.keys() if col in domain_df.columns])
|
| 179 |
+
ordered_cols.extend(const_cols)
|
| 180 |
+
|
| 181 |
+
# Add variable columns (sorted)
|
| 182 |
+
var_cols = sorted([col for col in variables if col in domain_df.columns])
|
| 183 |
+
ordered_cols.extend(var_cols)
|
| 184 |
+
|
| 185 |
+
# Add result columns
|
| 186 |
+
for col in result_cols:
|
| 187 |
+
if col in domain_df.columns:
|
| 188 |
+
ordered_cols.append(col)
|
| 189 |
+
|
| 190 |
+
# Add any remaining columns
|
| 191 |
+
remaining = [col for col in domain_df.columns if col not in ordered_cols]
|
| 192 |
+
ordered_cols.extend(remaining)
|
| 193 |
+
|
| 194 |
+
# Reorder dataframe
|
| 195 |
+
domain_df = domain_df[ordered_cols]
|
| 196 |
+
domain_dfs.append(domain_df)
|
| 197 |
|
| 198 |
+
return domain_dfs[0], domain_dfs[1], domain_dfs[2], domain_dfs[3], domain_dfs[4]
|
| 199 |
|
| 200 |
def get_domain_state(dataset):
|
| 201 |
empty_update = gr.update(visible=False, value=None, choices=[])
|
|
|
|
| 468 |
# TAB 2: Data Inspector
|
| 469 |
with gr.TabItem("Data Preview"):
|
| 470 |
gr.Markdown("### All Test Configurations by Domain")
|
| 471 |
+
|
| 472 |
+
# Add column type legend
|
| 473 |
+
gr.Markdown("""
|
| 474 |
+
**Column Organization Guide:**
|
| 475 |
+
- **Metadata** (Test ID, Config Purpose, Dataset Name) - Test identifiers
|
| 476 |
+
- **Constants** - Same value across all tests in this domain (e.g., embedding model)
|
| 477 |
+
- **Variables** - Experimental parameters that change across tests (e.g., chunk size, reranker)
|
| 478 |
+
- **Results** - Performance metrics (RMSE Relevance, RMSE Utilization, RMSE Completeness, F1-Score, AUC-ROC)
|
| 479 |
+
|
| 480 |
+
*Columns are automatically reordered: Metadata → Constants → Variables → Results*
|
| 481 |
+
""")
|
| 482 |
+
|
| 483 |
gr.Markdown("**Biomedical (PubMedQA)**")
|
| 484 |
preview_table_1 = gr.Dataframe(interactive=False, wrap=True)
|
| 485 |
+
gr.Markdown("**Customer Support (TechQA)**")
|
| 486 |
preview_table_2 = gr.Dataframe(interactive=False, wrap=True)
|
| 487 |
+
gr.Markdown("**Finance (FinQA)**")
|
| 488 |
preview_table_3 = gr.Dataframe(interactive=False, wrap=True)
|
| 489 |
+
gr.Markdown("**General (MS MARCO)**")
|
| 490 |
preview_table_4 = gr.Dataframe(interactive=False, wrap=True)
|
| 491 |
+
gr.Markdown("**Legal (CUAD)**")
|
| 492 |
+
preview_table_5 = gr.Dataframe(interactive=False, wrap=True)
|
| 493 |
preview_btn = gr.Button("Refresh Data Preview")
|
| 494 |
|
| 495 |
# TAB 3: Comparison
|
|
|
|
| 583 |
)
|
| 584 |
|
| 585 |
# Debug Preview Events
|
| 586 |
+
preview_btn.click(get_data_preview, inputs=None, outputs=[preview_table_1, preview_table_2, preview_table_3, preview_table_4, preview_table_5])
|
| 587 |
|
| 588 |
refresh_btn.click(
|
| 589 |
generate_inter_domain_comparison,
|