Spaces:

puligadda
/

rag12-analytics

Sleeping

App Files Files Community

npuliga commited on Jan 4

Commit

28e1133

1 Parent(s): 42b25fb

updated files

Browse files

Files changed (6) hide show

app.py +148 -2
responses/Biomedical_pubmedqa_checkpoint_100.csv +0 -0
responses/Customer_Support_techqa_checkpoint_100.csv +0 -0
responses/Finance_finqa_checkpoint_100.csv +0 -0
responses/General_msmarco_checkpoint_100.csv +0 -0
responses/Legal_cuad_checkpoint_100.csv +0 -0

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import pandas as pd
 import gradio as gr
 import plotly.express as px
 from typing import Dict
 from config import METADATA_COLUMNS, DATA_FOLDER
 from data_loader import load_csv_from_folder, get_available_datasets
@@ -30,18 +31,101 @@ def analyze_domain_configs(df_subset):
     return constants, variables
 def load_data() -> str:
-    """Loads data from the configured data folder."""
     try:
         df, status_msg = load_csv_from_folder(DATA_FOLDER)
         if not df.empty:
             # Remove failed_samples column if it exists
             if 'failed_samples' in df.columns:
                 df = df.drop(columns=['failed_samples'])
             DB["data"] = df
-        return status_msg
     except Exception as e:
         return f"Error loading data: {str(e)}"
 # --- 2. UI LOGIC ---
 def get_dataset_choices():
@@ -380,6 +464,52 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
             gr.Markdown("### Peak Performance")
             global_plot = gr.Plot()
     # EVENTS
     refresh_data_btn.click(
         load_data, inputs=None, outputs=[status]
@@ -413,6 +543,22 @@ with gr.Blocks(title="RAG Analytics Pro") as demo:
         inputs=[metric_dropdown],
         outputs=[comp_table, global_plot]
     )
 # Auto-load data on startup
 print(f"Loading data from {DATA_FOLDER}...")

 import gradio as gr
 import plotly.express as px
 from typing import Dict
+from pathlib import Path
 from config import METADATA_COLUMNS, DATA_FOLDER
 from data_loader import load_csv_from_folder, get_available_datasets
     return constants, variables
 def load_data() -> str:
+    """Loads data from the configured data folder and responses folder."""
     try:
+        # Load aggregate metrics data
         df, status_msg = load_csv_from_folder(DATA_FOLDER)
         if not df.empty:
             # Remove failed_samples column if it exists
             if 'failed_samples' in df.columns:
                 df = df.drop(columns=['failed_samples'])
             DB["data"] = df
+        # Load response data
+        DB["responses"] = load_response_data()
+        response_count = sum(len(df) for df in DB["responses"].values())
+        return f"{status_msg}\nLoaded {len(DB['responses'])} response datasets with {response_count} total responses."
     except Exception as e:
         return f"Error loading data: {str(e)}"
+def load_response_data() -> Dict[str, pd.DataFrame]:
+    """Load all response CSV files from responses folder."""
+    responses_folder = Path("./responses")
+    response_db = {}
+    domain_mapping = {
+        'Biomedical_pubmedqa_checkpoint_100.csv': 'Biomedical (PubMedQA)',
+        'Customer_Support_techqa_checkpoint_100.csv': 'Customer Support (TechQA)',
+        'Finance_finqa_checkpoint_100.csv': 'Finance (FinQA)',
+        'General_msmarco_checkpoint_100.csv': 'General (MS MARCO)',
+        'Legal_cuad_checkpoint_100.csv': 'Legal (CUAD)'
+    }
+    for filename, domain_name in domain_mapping.items():
+        filepath = responses_folder / filename
+        if filepath.exists():
+            df = pd.read_csv(filepath)
+            # Convert metric columns to numeric
+            for col in ['trace_relevance', 'trace_utilization', 'trace_completeness', 'trace_adherence']:
+                if col in df.columns:
+                    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0.0)
+            response_db[domain_name] = df
+    return response_db
+def get_questions_for_domain(domain):
+    """Get list of questions for selected domain."""
+    if "responses" not in DB or domain not in DB["responses"]:
+        return []
+    df = DB["responses"][domain]
+    questions = df['question'].unique().tolist()
+    return questions
+def get_response_details(domain, question):
+    """Get LLM answer, gold answer, and metrics for selected question."""
+    if "responses" not in DB or domain not in DB["responses"]:
+        return "", "", None
+    df = DB["responses"][domain]
+    row = df[df['question'] == question]
+    if row.empty:
+        return "", "", None
+    row = row.iloc[0]
+    llm_answer = str(row.get('answer', 'N/A'))
+    gold_answer = str(row.get('gold_answer', 'N/A'))
+    # Create metrics visualization
+    metrics_data = {
+        'Metric': ['Relevance', 'Utilization', 'Completeness', 'Adherence'],
+        'Score': [
+            row.get('trace_relevance', 0.0),
+            row.get('trace_utilization', 0.0),
+            row.get('trace_completeness', 0.0),
+            row.get('trace_adherence', 0.0)
+        ]
+    }
+    metrics_df = pd.DataFrame(metrics_data)
+    # Create bar chart
+    fig = px.bar(
+        metrics_df,
+        x='Metric',
+        y='Score',
+        title=f'Quality Metrics for Selected Response',
+        text_auto='.3f',
+        color='Metric',
+        range_y=[0, 1]
+    )
+    fig.update_traces(textposition='outside')
+    return llm_answer, gold_answer, fig
 # --- 2. UI LOGIC ---
 def get_dataset_choices():
             gr.Markdown("### Peak Performance")
             global_plot = gr.Plot()
+        # TAB 4: Response Preview & Metrics
+        with gr.TabItem("Response Preview & Metrics"):
+            gr.Markdown("### Preview LLM Responses and Quality Metrics")
+            gr.Markdown("Select a domain and question to view the generated answer, gold answer, and quality metrics.")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    domain_selector = gr.Dropdown(
+                        label="Select Domain",
+                        choices=[
+                            'Biomedical (PubMedQA)',
+                            'Customer Support (TechQA)',
+                            'Finance (FinQA)',
+                            'General (MS MARCO)',
+                            'Legal (CUAD)'
+                        ],
+                        interactive=True
+                    )
+                    question_selector = gr.Dropdown(
+                        label="Select Question",
+                        choices=[],
+                        interactive=True
+                    )
+                with gr.Column(scale=2):
+                    metrics_plot = gr.Plot(label="Quality Metrics")
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("#### LLM Generated Answer")
+                    llm_answer_box = gr.Textbox(
+                        label="LLM Answer",
+                        lines=12,
+                        interactive=False,
+                        show_copy_button=True
+                    )
+                with gr.Column():
+                    gr.Markdown("#### Gold Standard Answer")
+                    gold_answer_box = gr.Textbox(
+                        label="Gold Answer",
+                        lines=12,
+                        interactive=False,
+                        show_copy_button=True
+                    )
     # EVENTS
     refresh_data_btn.click(
         load_data, inputs=None, outputs=[status]
         inputs=[metric_dropdown],
         outputs=[comp_table, global_plot]
     )
+    # Response Preview Events
+    domain_selector.change(
+        fn=get_questions_for_domain,
+        inputs=[domain_selector],
+        outputs=[question_selector]
+    ).then(
+        fn=lambda: ("", "", None),
+        outputs=[llm_answer_box, gold_answer_box, metrics_plot]
+    )
+    question_selector.change(
+        fn=get_response_details,
+        inputs=[domain_selector, question_selector],
+        outputs=[llm_answer_box, gold_answer_box, metrics_plot]
+    )
 # Auto-load data on startup
 print(f"Loading data from {DATA_FOLDER}...")

responses/Biomedical_pubmedqa_checkpoint_100.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

responses/Customer_Support_techqa_checkpoint_100.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

responses/Finance_finqa_checkpoint_100.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

responses/General_msmarco_checkpoint_100.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

responses/Legal_cuad_checkpoint_100.csv ADDED Viewed

The diff for this file is too large to render. See raw diff