Spaces:

Supastrikas-004
/

evaluation-framework

Runtime error

App Files Files Community

Update app.py

#17

by manayporwal07 - opened Sep 13, 2025

base: refs/heads/main

←

from: refs/pr/17

Discussion Files changed

+72

-100

Files changed (1) hide show

app.py +72 -100

app.py CHANGED Viewed

@@ -230,132 +230,104 @@
 # demo.launch()
 # app.py (patch)
-"""
-Gradio application entrypoint for Hugging Face Spaces.
-"""
 import os
 import tempfile
-import pandas as pd
-import gradio as gr
-from evaluator import evaluate_dataframe   # <<-- fixed import (was `evaluation`)
-from synthetic_data import generate_synthetic_dataset
-# Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
 def save_uploaded(file_obj):
     if not file_obj:
         return None
-    # When using some Gradio versions, file_obj may be a dict with 'name' or 'tmp_path'
     if isinstance(file_obj, dict):
-        for key in ("name", "tmp_path", "file"):
-            path = file_obj.get(key)
-            if path and os.path.exists(path):
-                return path
-    # If it's already a path (string)
     if isinstance(file_obj, str) and os.path.exists(file_obj):
         return file_obj
-    # If it has a .name attribute and file exists
-    try:
-        if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
-            return file_obj.name
-    except Exception:
-        pass
-    # Fallback: write bytes to a temp file
-    try:
-        data = file_obj.read()
-    except Exception:
-        return None
-    # choose suffix heuristically
-    name_attr = getattr(file_obj, "name", "")
-    suffix = ".csv" if name_attr.lower().endswith(".csv") else ".json"
-    fd, tmp = tempfile.mkstemp(suffix=suffix)
     with os.fdopen(fd, "wb") as f:
-        if isinstance(data, str):
-            f.write(data.encode())
-        else:
-            f.write(data)
     return tmp
 def load_file_to_df(path):
     if path is None:
         return None
     try:
-        if str(path).lower().endswith(".csv"):
-            return pd.read_csv(path)
-        try:
-            return pd.read_json(path, lines=True)
-        except ValueError:
-            return pd.read_json(path)
-    except Exception as e:
-        raise e
-def run_evaluation(file_obj):
-    # If no file provided, use synthetic demo
-    if file_obj is None:
-        df = generate_synthetic_dataset(num_agents=3, num_samples=12)
-    else:
-        path = save_uploaded(file_obj)
-        df = load_file_to_df(path)
-    if df is None:
-        return None, "No data loaded", None, None
-    # Normalize column names
-    cols = {c.lower(): c for c in df.columns}
-    rename_map = {}
-    for k in ["prompt", "response", "task", "agent", "reference"]:
-        if k not in cols:
-            if k == "reference":
-                for alt in ["answer", "ground_truth", "ref"]:
-                    if alt in cols:
-                        rename_map[cols[alt]] = k
-                        break
-            else:
-                for alt in [k, k.capitalize(), k.upper()]:
-                    if alt.lower() in cols:
-                        rename_map[cols[alt.lower()]] = k
-    if rename_map:
-        df = df.rename(columns=rename_map)
-    metrics_df, images, leaderboard = evaluate_dataframe(df)
-    # Prepare gallery (list of image file paths). Gradio Gallery accepts list of (path, caption).
-    gallery_items = [p for (p, caption) in images]
-    captions = [caption for (p, caption) in images]
-    # Save a CSV report for download
-    out_csv = "/tmp/eval_results.csv"
-    metrics_df.to_csv(out_csv, index=False)
-    return (gallery_items, captions), metrics_df, leaderboard, out_csv
-# Build Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("# Agentic Evaluation Framework")
-    gr.Markdown("Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. If no file is uploaded, a synthetic demo will run.")
-    with gr.Row():
-        file_input = gr.File(label="Upload CSV/JSON/JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
-        run_btn = gr.Button("Run Evaluation")
-        download_report = gr.File(label="Download CSV Report")  # output
-    gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
-    table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
-    leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
-    def on_run(file_in):
-        (gallery_items, captions), metrics_df, lb, out_csv = run_evaluation(file_in)
-        gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
-        return gallery_display, metrics_df, lb, out_csv
-    # include download_report as the last output
-    run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard, download_report])
-    gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). - `reference` optional.\n- Download CSV report after evaluation.")
-demo.launch()

 # demo.launch()
 # app.py (patch)
+import gradio as gr
+import pandas as pd
 import os
 import tempfile
+from evaluator import evaluate_dataframe, generate_visualizations
+# -----------------------
+# Helpers
+# -----------------------
 def save_uploaded(file_obj):
+    """Return a filesystem path for the uploaded file object."""
     if not file_obj:
         return None
     if isinstance(file_obj, dict):
+        for key in ("name", "path", "file"):
+            p = file_obj.get(key)
+            if p and os.path.exists(p):
+                return p
     if isinstance(file_obj, str) and os.path.exists(file_obj):
         return file_obj
+    if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
+        return file_obj.name
+    # fallback: dump bytes to tmp file
+    fd, tmp = tempfile.mkstemp(suffix=".csv")
     with os.fdopen(fd, "wb") as f:
+        f.write(file_obj.read())
     return tmp
 def load_file_to_df(path):
     if path is None:
         return None
+    p = str(path)
+    try:
+        if p.lower().endswith(".csv"):
+            return pd.read_csv(p, sep=None, engine="python")
+    except Exception:
+        pass
     try:
+        return pd.read_json(p, lines=True)
+    except Exception:
+        return pd.read_json(p)
+# -----------------------
+# Evaluation wrapper
+# -----------------------
+def run_evaluation(file):
+    path = save_uploaded(file)
+    df = load_file_to_df(path)
+    if df is None or df.empty:
+        return None, None, None, None, None
+    # Normalize column names
+    df.columns = [c.strip() for c in df.columns]
+    # Expected cols: task_id, task_type, prompt, agent, response, metadata
+    for col in ["task_id", "task_type", "prompt", "agent", "response", "metadata"]:
+        if col not in df.columns:
+            df[col] = ""
+    # Add reference column if not provided
+    if "reference" not in df.columns:
+        df["reference"] = ""
+    metrics_df, images, leaderboard = evaluate_dataframe(df)
+    figs = generate_visualizations(metrics_df, leaderboard)
+    # save evaluation results
+    csv_path = "/tmp/eval_results.csv"
+    metrics_df.to_csv(csv_path, index=False)
+    return figs, metrics_df, leaderboard, csv_path
+# -----------------------
+# Gradio UI
+# -----------------------
+with gr.Blocks(title="Agentic Evaluation Framework") as demo:
+    gr.Markdown("## Agentic Evaluation Framework")
+    gr.Markdown("Upload a CSV file with format: "
+                "`task_id, task_type, prompt, agent, response, metadata`")
+    with gr.Row():
+        file_upload = gr.File(label="Upload CSV", type="file")
+        eval_btn = gr.Button("Run Evaluation", variant="primary")
+    gallery = gr.Gallery(label="Visualizations", columns=2, height="auto")
+    metrics_df_out = gr.Dataframe(label="Evaluation Results")
+    leaderboard_out = gr.Dataframe(label="Leaderboard (Avg Scores)")
+    download_out = gr.File(label="Download CSV Report")
+    eval_btn.click(
+        fn=run_evaluation,
+        inputs=file_upload,
+        outputs=[gallery, metrics_df_out, leaderboard_out, download_out]
+    )
+if __name__ == "__main__":
+    demo.launch()