Spaces:

Supastrikas-004
/

evaluation-framework

Runtime error

App Files Files Community

manayporwal07 commited on Sep 13, 2025

Commit

88604d0

verified ·

1 Parent(s): d4e4912

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -49

app.py CHANGED Viewed

@@ -229,38 +229,63 @@
 # demo.launch()
 """
 Gradio application entrypoint for Hugging Face Spaces.
 """
 import os
 import tempfile
 import pandas as pd
 import gradio as gr
-from evaluator import evaluate_dataframe
 from synthetic_data import generate_synthetic_dataset
-# -----------------------------
-# File Handling
-# -----------------------------
 def save_uploaded(file_obj):
     if not file_obj:
         return None
     try:
-        return file_obj.name
     except Exception:
         data = file_obj.read()
-        suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
-        fd, tmp = tempfile.mkstemp(suffix=suffix)
-        with os.fdopen(fd, "wb") as f:
             f.write(data)
-        return tmp
 def load_file_to_df(path):
     if path is None:
         return None
     try:
-        if path.endswith(".csv"):
             return pd.read_csv(path)
         try:
             return pd.read_json(path, lines=True)
@@ -269,11 +294,8 @@ def load_file_to_df(path):
     except Exception as e:
         raise e
-# -----------------------------
-# Evaluation Pipeline
-# -----------------------------
 def run_evaluation(file_obj):
     if file_obj is None:
         df = generate_synthetic_dataset(num_agents=3, num_samples=12)
     else:
@@ -281,65 +303,59 @@ def run_evaluation(file_obj):
         df = load_file_to_df(path)
     if df is None:
-        return None, "No data loaded", None
     # Normalize column names
     cols = {c.lower(): c for c in df.columns}
     rename_map = {}
-    for k in ["task_id", "prompt", "response", "agent", "reference"]:
         if k not in cols:
-            for alt in [k, k.capitalize(), k.upper()]:
-                if alt.lower() in cols:
-                    rename_map[cols[alt.lower()]] = k
-                    break
     if rename_map:
         df = df.rename(columns=rename_map)
     metrics_df, images, leaderboard = evaluate_dataframe(df)
     gallery_items = [p for (p, caption) in images]
     captions = [caption for (p, caption) in images]
-    # Save CSV report
     out_csv = "/tmp/eval_results.csv"
     metrics_df.to_csv(out_csv, index=False)
-    return (gallery_items, captions), metrics_df, leaderboard
-# -----------------------------
-# Gradio UI
-# -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧪 Agentic Evaluation Framework")
-    gr.Markdown(
-        "Upload a CSV/JSON/JSONL with columns: "
-        "`task_id,prompt,response,agent,reference`. "
-        "If no file is uploaded, a small synthetic demo will run."
-    )
     with gr.Row():
-        file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
         run_btn = gr.Button("Run Evaluation")
-        download_report = gr.File(label="Download CSV Report")
     gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
     table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
-    leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent)")
     def on_run(file_in):
-        (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
-        gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)] if gallery_items else []
-        csv_path = "/tmp/eval_results.csv"  # ensure CSV report is downloadable
-        return gallery_display, metrics_df, lb, csv_path
-    run_btn.click(
-        fn=on_run,
-        inputs=[file_input],
-        outputs=[gallery, table, leaderboard, download_report]
-        )
-    gr.Markdown("## Tips\n- Ensure columns: `task_id,prompt,response,agent,reference` "
-                "(case-insensitive).\n- Visualization images in Gallery.\n- Download CSV after evaluation.")
 demo.launch()

 # demo.launch()
+# app.py (patch)
 """
 Gradio application entrypoint for Hugging Face Spaces.
 """
 import os
 import tempfile
 import pandas as pd
 import gradio as gr
+from evaluator import evaluate_dataframe   # <<-- fixed import (was `evaluation`)
 from synthetic_data import generate_synthetic_dataset
+# Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
 def save_uploaded(file_obj):
     if not file_obj:
         return None
+    # When using some Gradio versions, file_obj may be a dict with 'name' or 'tmp_path'
+    if isinstance(file_obj, dict):
+        for key in ("name", "tmp_path", "file"):
+            path = file_obj.get(key)
+            if path and os.path.exists(path):
+                return path
+    # If it's already a path (string)
+    if isinstance(file_obj, str) and os.path.exists(file_obj):
+        return file_obj
+    # If it has a .name attribute and file exists
     try:
+        if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
+            return file_obj.name
     except Exception:
+        pass
+    # Fallback: write bytes to a temp file
+    try:
         data = file_obj.read()
+    except Exception:
+        return None
+    # choose suffix heuristically
+    name_attr = getattr(file_obj, "name", "")
+    suffix = ".csv" if name_attr.lower().endswith(".csv") else ".json"
+    fd, tmp = tempfile.mkstemp(suffix=suffix)
+    with os.fdopen(fd, "wb") as f:
+        if isinstance(data, str):
+            f.write(data.encode())
+        else:
             f.write(data)
+    return tmp
 def load_file_to_df(path):
     if path is None:
         return None
     try:
+        if str(path).lower().endswith(".csv"):
             return pd.read_csv(path)
         try:
             return pd.read_json(path, lines=True)
     except Exception as e:
         raise e
 def run_evaluation(file_obj):
+    # If no file provided, use synthetic demo
     if file_obj is None:
         df = generate_synthetic_dataset(num_agents=3, num_samples=12)
     else:
         df = load_file_to_df(path)
     if df is None:
+        return None, "No data loaded", None, None
     # Normalize column names
     cols = {c.lower(): c for c in df.columns}
     rename_map = {}
+    for k in ["prompt", "response", "task", "agent", "reference"]:
         if k not in cols:
+            if k == "reference":
+                for alt in ["answer", "ground_truth", "ref"]:
+                    if alt in cols:
+                        rename_map[cols[alt]] = k
+                        break
+            else:
+                for alt in [k, k.capitalize(), k.upper()]:
+                    if alt.lower() in cols:
+                        rename_map[cols[alt.lower()]] = k
     if rename_map:
         df = df.rename(columns=rename_map)
     metrics_df, images, leaderboard = evaluate_dataframe(df)
+    # Prepare gallery (list of image file paths). Gradio Gallery accepts list of (path, caption).
     gallery_items = [p for (p, caption) in images]
     captions = [caption for (p, caption) in images]
+    # Save a CSV report for download
     out_csv = "/tmp/eval_results.csv"
     metrics_df.to_csv(out_csv, index=False)
+    return (gallery_items, captions), metrics_df, leaderboard, out_csv
+# Build Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# Agentic Evaluation Framework")
+    gr.Markdown("Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. If no file is uploaded, a synthetic demo will run.")
     with gr.Row():
+        file_input = gr.File(label="Upload CSV/JSON/JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
         run_btn = gr.Button("Run Evaluation")
+        download_report = gr.File(label="Download CSV Report")  # output
     gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
     table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
+    leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
     def on_run(file_in):
+        (gallery_items, captions), metrics_df, lb, out_csv = run_evaluation(file_in)
+        gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
+        return gallery_display, metrics_df, lb, out_csv
+    # include download_report as the last output
+    run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard, download_report])
+    gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). - `reference` optional.\n- Download CSV report after evaluation.")
 demo.launch()