npuliga commited on
Commit
cd642cf
·
1 Parent(s): 68679f1

updated app

Browse files
Files changed (1) hide show
  1. app.py +67 -18
app.py CHANGED
@@ -77,11 +77,11 @@ def load_response_data() -> Dict[str, pd.DataFrame]:
77
  def get_questions_for_domain(domain):
78
  """Get list of questions for selected domain."""
79
  if "responses" not in DB or domain not in DB["responses"]:
80
- return []
81
 
82
  df = DB["responses"][domain]
83
  questions = df['question'].unique().tolist()
84
- return questions
85
 
86
  def get_response_details(domain, question):
87
  """Get LLM answer, gold answer, and metrics for selected question."""
@@ -139,28 +139,63 @@ def get_dataset_choices():
139
  return []
140
 
141
  def get_data_preview():
142
- """Returns separate dataframes for each domain."""
143
  if "data" not in DB:
144
- return {}, {}, {}, {}
145
 
146
  df = DB["data"]
147
  # Remove failed_samples column if it exists
148
  if 'failed_samples' in df.columns:
149
  df = df.drop(columns=['failed_samples'])
150
 
151
- datasets = df['dataset_name'].unique()
 
152
 
153
- # Create separate dataframes for each domain
154
- results = {}
155
- for ds in datasets:
156
- results[ds] = df[df['dataset_name'] == ds]
157
 
158
- # Return up to 4 domains (adjust if you have more)
159
- domain_dfs = list(results.values())
160
- while len(domain_dfs) < 4:
161
- domain_dfs.append(pd.DataFrame())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- return domain_dfs[0], domain_dfs[1], domain_dfs[2], domain_dfs[3]
164
 
165
  def get_domain_state(dataset):
166
  empty_update = gr.update(visible=False, value=None, choices=[])
@@ -433,14 +468,28 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
433
  # TAB 2: Data Inspector
434
  with gr.TabItem("Data Preview"):
435
  gr.Markdown("### All Test Configurations by Domain")
 
 
 
 
 
 
 
 
 
 
 
 
436
  gr.Markdown("**Biomedical (PubMedQA)**")
437
  preview_table_1 = gr.Dataframe(interactive=False, wrap=True)
438
- gr.Markdown("**Finance (FinQA)**")
439
  preview_table_2 = gr.Dataframe(interactive=False, wrap=True)
440
- gr.Markdown("**General (MS MARCO)**")
441
  preview_table_3 = gr.Dataframe(interactive=False, wrap=True)
442
- gr.Markdown("**Legal (CUAD)**")
443
  preview_table_4 = gr.Dataframe(interactive=False, wrap=True)
 
 
444
  preview_btn = gr.Button("Refresh Data Preview")
445
 
446
  # TAB 3: Comparison
@@ -534,7 +583,7 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
534
  )
535
 
536
  # Debug Preview Events
537
- preview_btn.click(get_data_preview, inputs=None, outputs=[preview_table_1, preview_table_2, preview_table_3, preview_table_4])
538
 
539
  refresh_btn.click(
540
  generate_inter_domain_comparison,
 
77
  def get_questions_for_domain(domain):
78
  """Get list of questions for selected domain."""
79
  if "responses" not in DB or domain not in DB["responses"]:
80
+ return gr.update(choices=[], value=None)
81
 
82
  df = DB["responses"][domain]
83
  questions = df['question'].unique().tolist()
84
+ return gr.update(choices=questions, value=None)
85
 
86
  def get_response_details(domain, question):
87
  """Get LLM answer, gold answer, and metrics for selected question."""
 
139
  return []
140
 
141
  def get_data_preview():
142
+ """Returns separate dataframes for each domain with columns reordered by type."""
143
  if "data" not in DB:
144
+ return {}, {}, {}, {}, {}
145
 
146
  df = DB["data"]
147
  # Remove failed_samples column if it exists
148
  if 'failed_samples' in df.columns:
149
  df = df.drop(columns=['failed_samples'])
150
 
151
+ # Define explicit domain order matching the UI
152
+ domain_order = ['pubmedqa', 'techqa', 'finqa', 'msmarco', 'cuad']
153
 
154
+ # Metric columns (Results)
155
+ result_cols = ['rmse_relevance', 'rmse_utilization', 'rmse_completeness', 'f1_score', 'aucroc']
156
+ metadata_cols = ['test_id', 'config_purpose', 'dataset_name']
 
157
 
158
+ domain_dfs = []
159
+ for ds in domain_order:
160
+ domain_df = df[df['dataset_name'] == ds].copy()
161
+
162
+ if domain_df.empty:
163
+ domain_dfs.append(pd.DataFrame())
164
+ continue
165
+
166
+ # Analyze constants and variables
167
+ consts, variables = analyze_domain_configs(domain_df)
168
+
169
+ # Reorder columns: Metadata -> Constants -> Variables -> Results
170
+ ordered_cols = []
171
+
172
+ # Add metadata columns first
173
+ for col in metadata_cols:
174
+ if col in domain_df.columns:
175
+ ordered_cols.append(col)
176
+
177
+ # Add constant columns (sorted)
178
+ const_cols = sorted([col for col in consts.keys() if col in domain_df.columns])
179
+ ordered_cols.extend(const_cols)
180
+
181
+ # Add variable columns (sorted)
182
+ var_cols = sorted([col for col in variables if col in domain_df.columns])
183
+ ordered_cols.extend(var_cols)
184
+
185
+ # Add result columns
186
+ for col in result_cols:
187
+ if col in domain_df.columns:
188
+ ordered_cols.append(col)
189
+
190
+ # Add any remaining columns
191
+ remaining = [col for col in domain_df.columns if col not in ordered_cols]
192
+ ordered_cols.extend(remaining)
193
+
194
+ # Reorder dataframe
195
+ domain_df = domain_df[ordered_cols]
196
+ domain_dfs.append(domain_df)
197
 
198
+ return domain_dfs[0], domain_dfs[1], domain_dfs[2], domain_dfs[3], domain_dfs[4]
199
 
200
  def get_domain_state(dataset):
201
  empty_update = gr.update(visible=False, value=None, choices=[])
 
468
  # TAB 2: Data Inspector
469
  with gr.TabItem("Data Preview"):
470
  gr.Markdown("### All Test Configurations by Domain")
471
+
472
+ # Add column type legend
473
+ gr.Markdown("""
474
+ **Column Organization Guide:**
475
+ - **Metadata** (Test ID, Config Purpose, Dataset Name) - Test identifiers
476
+ - **Constants** - Same value across all tests in this domain (e.g., embedding model)
477
+ - **Variables** - Experimental parameters that change across tests (e.g., chunk size, reranker)
478
+ - **Results** - Performance metrics (RMSE Relevance, RMSE Utilization, RMSE Completeness, F1-Score, AUC-ROC)
479
+
480
+ *Columns are automatically reordered: Metadata → Constants → Variables → Results*
481
+ """)
482
+
483
  gr.Markdown("**Biomedical (PubMedQA)**")
484
  preview_table_1 = gr.Dataframe(interactive=False, wrap=True)
485
+ gr.Markdown("**Customer Support (TechQA)**")
486
  preview_table_2 = gr.Dataframe(interactive=False, wrap=True)
487
+ gr.Markdown("**Finance (FinQA)**")
488
  preview_table_3 = gr.Dataframe(interactive=False, wrap=True)
489
+ gr.Markdown("**General (MS MARCO)**")
490
  preview_table_4 = gr.Dataframe(interactive=False, wrap=True)
491
+ gr.Markdown("**Legal (CUAD)**")
492
+ preview_table_5 = gr.Dataframe(interactive=False, wrap=True)
493
  preview_btn = gr.Button("Refresh Data Preview")
494
 
495
  # TAB 3: Comparison
 
583
  )
584
 
585
  # Debug Preview Events
586
+ preview_btn.click(get_data_preview, inputs=None, outputs=[preview_table_1, preview_table_2, preview_table_3, preview_table_4, preview_table_5])
587
 
588
  refresh_btn.click(
589
  generate_inter_domain_comparison,