Spaces:

Supastrikas-004
/

evaluation-framework

Runtime error

App Files Files Community

Update app.py

by manayporwal07 - opened Sep 13, 2025

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

+110

-0

Files changed (1) hide show

app.py +110 -0

app.py CHANGED Viewed

@@ -228,3 +228,113 @@
 #                 "- `reference` optional.\n- Download CSV report after evaluation.")
 # demo.launch()

 #                 "- `reference` optional.\n- Download CSV report after evaluation.")
 # demo.launch()
+"""
+Gradio application entrypoint for Hugging Face Spaces.
+"""
+import os
+import tempfile
+import pandas as pd
+import gradio as gr
+from evaluator import evaluate_dataframe
+from synthetic_data import generate_synthetic_dataset
+# -----------------------------
+# File Handling
+# -----------------------------
+def save_uploaded(file_obj):
+    if not file_obj:
+        return None
+    try:
+        return file_obj.name
+    except Exception:
+        data = file_obj.read()
+        suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
+        fd, tmp = tempfile.mkstemp(suffix=suffix)
+        with os.fdopen(fd, "wb") as f:
+            f.write(data)
+        return tmp
+def load_file_to_df(path):
+    if path is None:
+        return None
+    try:
+        if path.endswith(".csv"):
+            return pd.read_csv(path)
+        try:
+            return pd.read_json(path, lines=True)
+        except ValueError:
+            return pd.read_json(path)
+    except Exception as e:
+        raise e
+# -----------------------------
+# Evaluation Pipeline
+# -----------------------------
+def run_evaluation(file_obj):
+    if file_obj is None:
+        df = generate_synthetic_dataset(num_agents=3, num_samples=12)
+    else:
+        path = save_uploaded(file_obj)
+        df = load_file_to_df(path)
+    if df is None:
+        return None, "No data loaded", None
+    # Normalize column names
+    cols = {c.lower(): c for c in df.columns}
+    rename_map = {}
+    for k in ["task_id", "prompt", "response", "agent", "reference"]:
+        if k not in cols:
+            for alt in [k, k.capitalize(), k.upper()]:
+                if alt.lower() in cols:
+                    rename_map[cols[alt.lower()]] = k
+                    break
+    if rename_map:
+        df = df.rename(columns=rename_map)
+    metrics_df, images, leaderboard = evaluate_dataframe(df)
+    gallery_items = [p for (p, caption) in images]
+    captions = [caption for (p, caption) in images]
+    # Save CSV report
+    out_csv = "/tmp/eval_results.csv"
+    metrics_df.to_csv(out_csv, index=False)
+    return (gallery_items, captions), metrics_df, leaderboard
+# -----------------------------
+# Gradio UI
+# -----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧪 Agentic Evaluation Framework")
+    gr.Markdown(
+        "Upload a CSV/JSON/JSONL with columns: "
+        "`task_id,prompt,response,agent,reference`. "
+        "If no file is uploaded, a small synthetic demo will run."
+    )
+    with gr.Row():
+        file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
+        run_btn = gr.Button("Run Evaluation")
+        download_report = gr.File(label="Download CSV Report")
+    gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
+    table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
+    leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")
+    def on_run(file_in):
+        (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
+        gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
+        return gallery_display, metrics_df, lb
+    run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
+    gr.Markdown("## Tips\n- Ensure columns: `task_id,prompt,response,agent,reference` "
+                "(case-insensitive).\n- Visualization images in Gallery.\n- Download CSV after evaluation.")
+demo.launch()