npuliga commited on
Commit
28e1133
·
1 Parent(s): 42b25fb

updated files

Browse files
app.py CHANGED
@@ -2,6 +2,7 @@ import pandas as pd
2
  import gradio as gr
3
  import plotly.express as px
4
  from typing import Dict
 
5
 
6
  from config import METADATA_COLUMNS, DATA_FOLDER
7
  from data_loader import load_csv_from_folder, get_available_datasets
@@ -30,18 +31,101 @@ def analyze_domain_configs(df_subset):
30
  return constants, variables
31
 
32
  def load_data() -> str:
33
- """Loads data from the configured data folder."""
34
  try:
 
35
  df, status_msg = load_csv_from_folder(DATA_FOLDER)
36
  if not df.empty:
37
  # Remove failed_samples column if it exists
38
  if 'failed_samples' in df.columns:
39
  df = df.drop(columns=['failed_samples'])
40
  DB["data"] = df
41
- return status_msg
 
 
 
 
 
42
  except Exception as e:
43
  return f"Error loading data: {str(e)}"
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # --- 2. UI LOGIC ---
46
 
47
  def get_dataset_choices():
@@ -380,6 +464,52 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
380
  gr.Markdown("### Peak Performance")
381
  global_plot = gr.Plot()
382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  # EVENTS
384
  refresh_data_btn.click(
385
  load_data, inputs=None, outputs=[status]
@@ -413,6 +543,22 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
413
  inputs=[metric_dropdown],
414
  outputs=[comp_table, global_plot]
415
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
  # Auto-load data on startup
418
  print(f"Loading data from {DATA_FOLDER}...")
 
2
  import gradio as gr
3
  import plotly.express as px
4
  from typing import Dict
5
+ from pathlib import Path
6
 
7
  from config import METADATA_COLUMNS, DATA_FOLDER
8
  from data_loader import load_csv_from_folder, get_available_datasets
 
31
  return constants, variables
32
 
33
  def load_data() -> str:
34
+ """Loads data from the configured data folder and responses folder."""
35
  try:
36
+ # Load aggregate metrics data
37
  df, status_msg = load_csv_from_folder(DATA_FOLDER)
38
  if not df.empty:
39
  # Remove failed_samples column if it exists
40
  if 'failed_samples' in df.columns:
41
  df = df.drop(columns=['failed_samples'])
42
  DB["data"] = df
43
+
44
+ # Load response data
45
+ DB["responses"] = load_response_data()
46
+ response_count = sum(len(df) for df in DB["responses"].values())
47
+
48
+ return f"{status_msg}\nLoaded {len(DB['responses'])} response datasets with {response_count} total responses."
49
  except Exception as e:
50
  return f"Error loading data: {str(e)}"
51
 
52
+ def load_response_data() -> Dict[str, pd.DataFrame]:
53
+ """Load all response CSV files from responses folder."""
54
+ responses_folder = Path("./responses")
55
+ response_db = {}
56
+
57
+ domain_mapping = {
58
+ 'Biomedical_pubmedqa_checkpoint_100.csv': 'Biomedical (PubMedQA)',
59
+ 'Customer_Support_techqa_checkpoint_100.csv': 'Customer Support (TechQA)',
60
+ 'Finance_finqa_checkpoint_100.csv': 'Finance (FinQA)',
61
+ 'General_msmarco_checkpoint_100.csv': 'General (MS MARCO)',
62
+ 'Legal_cuad_checkpoint_100.csv': 'Legal (CUAD)'
63
+ }
64
+
65
+ for filename, domain_name in domain_mapping.items():
66
+ filepath = responses_folder / filename
67
+ if filepath.exists():
68
+ df = pd.read_csv(filepath)
69
+ # Convert metric columns to numeric
70
+ for col in ['trace_relevance', 'trace_utilization', 'trace_completeness', 'trace_adherence']:
71
+ if col in df.columns:
72
+ df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0.0)
73
+ response_db[domain_name] = df
74
+
75
+ return response_db
76
+
77
+ def get_questions_for_domain(domain):
78
+ """Get list of questions for selected domain."""
79
+ if "responses" not in DB or domain not in DB["responses"]:
80
+ return []
81
+
82
+ df = DB["responses"][domain]
83
+ questions = df['question'].unique().tolist()
84
+ return questions
85
+
86
+ def get_response_details(domain, question):
87
+ """Get LLM answer, gold answer, and metrics for selected question."""
88
+ if "responses" not in DB or domain not in DB["responses"]:
89
+ return "", "", None
90
+
91
+ df = DB["responses"][domain]
92
+ row = df[df['question'] == question]
93
+
94
+ if row.empty:
95
+ return "", "", None
96
+
97
+ row = row.iloc[0]
98
+
99
+ llm_answer = str(row.get('answer', 'N/A'))
100
+ gold_answer = str(row.get('gold_answer', 'N/A'))
101
+
102
+ # Create metrics visualization
103
+ metrics_data = {
104
+ 'Metric': ['Relevance', 'Utilization', 'Completeness', 'Adherence'],
105
+ 'Score': [
106
+ row.get('trace_relevance', 0.0),
107
+ row.get('trace_utilization', 0.0),
108
+ row.get('trace_completeness', 0.0),
109
+ row.get('trace_adherence', 0.0)
110
+ ]
111
+ }
112
+
113
+ metrics_df = pd.DataFrame(metrics_data)
114
+
115
+ # Create bar chart
116
+ fig = px.bar(
117
+ metrics_df,
118
+ x='Metric',
119
+ y='Score',
120
+ title=f'Quality Metrics for Selected Response',
121
+ text_auto='.3f',
122
+ color='Metric',
123
+ range_y=[0, 1]
124
+ )
125
+ fig.update_traces(textposition='outside')
126
+
127
+ return llm_answer, gold_answer, fig
128
+
129
  # --- 2. UI LOGIC ---
130
 
131
  def get_dataset_choices():
 
464
  gr.Markdown("### Peak Performance")
465
  global_plot = gr.Plot()
466
 
467
+ # TAB 4: Response Preview & Metrics
468
+ with gr.TabItem("Response Preview & Metrics"):
469
+ gr.Markdown("### Preview LLM Responses and Quality Metrics")
470
+ gr.Markdown("Select a domain and question to view the generated answer, gold answer, and quality metrics.")
471
+
472
+ with gr.Row():
473
+ with gr.Column(scale=1):
474
+ domain_selector = gr.Dropdown(
475
+ label="Select Domain",
476
+ choices=[
477
+ 'Biomedical (PubMedQA)',
478
+ 'Customer Support (TechQA)',
479
+ 'Finance (FinQA)',
480
+ 'General (MS MARCO)',
481
+ 'Legal (CUAD)'
482
+ ],
483
+ interactive=True
484
+ )
485
+ question_selector = gr.Dropdown(
486
+ label="Select Question",
487
+ choices=[],
488
+ interactive=True
489
+ )
490
+
491
+ with gr.Column(scale=2):
492
+ metrics_plot = gr.Plot(label="Quality Metrics")
493
+
494
+ with gr.Row():
495
+ with gr.Column():
496
+ gr.Markdown("#### LLM Generated Answer")
497
+ llm_answer_box = gr.Textbox(
498
+ label="LLM Answer",
499
+ lines=12,
500
+ interactive=False,
501
+ show_copy_button=True
502
+ )
503
+
504
+ with gr.Column():
505
+ gr.Markdown("#### Gold Standard Answer")
506
+ gold_answer_box = gr.Textbox(
507
+ label="Gold Answer",
508
+ lines=12,
509
+ interactive=False,
510
+ show_copy_button=True
511
+ )
512
+
513
  # EVENTS
514
  refresh_data_btn.click(
515
  load_data, inputs=None, outputs=[status]
 
543
  inputs=[metric_dropdown],
544
  outputs=[comp_table, global_plot]
545
  )
546
+
547
+ # Response Preview Events
548
+ domain_selector.change(
549
+ fn=get_questions_for_domain,
550
+ inputs=[domain_selector],
551
+ outputs=[question_selector]
552
+ ).then(
553
+ fn=lambda: ("", "", None),
554
+ outputs=[llm_answer_box, gold_answer_box, metrics_plot]
555
+ )
556
+
557
+ question_selector.change(
558
+ fn=get_response_details,
559
+ inputs=[domain_selector, question_selector],
560
+ outputs=[llm_answer_box, gold_answer_box, metrics_plot]
561
+ )
562
 
563
  # Auto-load data on startup
564
  print(f"Loading data from {DATA_FOLDER}...")
responses/Biomedical_pubmedqa_checkpoint_100.csv ADDED
The diff for this file is too large to render. See raw diff
 
responses/Customer_Support_techqa_checkpoint_100.csv ADDED
The diff for this file is too large to render. See raw diff
 
responses/Finance_finqa_checkpoint_100.csv ADDED
The diff for this file is too large to render. See raw diff
 
responses/General_msmarco_checkpoint_100.csv ADDED
The diff for this file is too large to render. See raw diff
 
responses/Legal_cuad_checkpoint_100.csv ADDED
The diff for this file is too large to render. See raw diff