Spaces:

Supastrikas-004
/

evaluation-framework

Runtime error

App Files Files Community

Update app.py

by manayporwal07 - opened Sep 13, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+138

-34

Files changed (1) hide show

app.py +138 -34

app.py CHANGED Viewed

@@ -1,3 +1,129 @@
 # app.py
 """
 Gradio application entrypoint for Hugging Face Spaces.
@@ -7,19 +133,16 @@ import os
 import tempfile
 import pandas as pd
 import gradio as gr
-from evaluator import evaluate_dataframe
 from synthetic_data import generate_synthetic_dataset
-# Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
 def save_uploaded(file_obj):
     if not file_obj:
         return None
-    # file_obj can be a dictionary or a file-like object depending on Gradio version
     try:
-        path = file_obj.name
-        return path
     except Exception:
-        # fallback: write bytes to temp file
         data = file_obj.read()
         suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
         fd, tmp = tempfile.mkstemp(suffix=suffix)
@@ -30,38 +153,31 @@ def save_uploaded(file_obj):
 def load_file_to_df(path):
     if path is None:
         return None
-    # Try CSV
     try:
         if path.endswith(".csv"):
             return pd.read_csv(path)
-        # JSONL
         try:
             return pd.read_json(path, lines=True)
         except ValueError:
             return pd.read_json(path)
     except Exception as e:
-        # As last resort, raise
         raise e
 def run_evaluation(file_obj):
-    # If no file provided, use synthetic demo
     if file_obj is None:
         df = generate_synthetic_dataset(num_agents=3, num_samples=12)
     else:
         path = save_uploaded(file_obj)
         df = load_file_to_df(path)
-    # Ensure required columns exist; otherwise, attempt to map common alternatives
     if df is None:
         return None, "No data loaded", None
-    # Try to normalize column names
     cols = {c.lower(): c for c in df.columns}
-    # rename common variants
     rename_map = {}
     for k in ["prompt", "response", "task", "agent", "reference"]:
         if k not in cols:
-            # try variants
             if k == "reference":
                 for alt in ["answer", "ground_truth", "ref"]:
                     if alt in cols:
@@ -76,11 +192,9 @@ def run_evaluation(file_obj):
     metrics_df, images, leaderboard = evaluate_dataframe(df)
-    # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
     gallery_items = [p for (p, caption) in images]
     captions = [caption for (p, caption) in images]
-    # Save a CSV report for download
     out_csv = "/tmp/eval_results.csv"
     metrics_df.to_csv(out_csv, index=False)
@@ -90,37 +204,27 @@ def run_evaluation(file_obj):
 with gr.Blocks() as demo:
     gr.Markdown("# Agentic Evaluation Framework")
     gr.Markdown(
-        "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
-        "If no file is uploaded, a small synthetic demo will run."
     )
     with gr.Row():
-        file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
         run_btn = gr.Button("Run Evaluation")
         download_report = gr.File(label="Download CSV Report")
-    # ✅ Fixed Gallery (removed .style, added columns=2)
-    gallery = gr.Gallery(
-        label="Visualization Outputs",
-        columns=2,
-        height="auto"
-    )
     table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
-    leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")
     def on_run(file_in):
         (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
-        # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
-        gallery_display = []
-        for i, p in enumerate(gallery_items):
-            caption = captions[i] if i < len(captions) else ""
-            gallery_display.append((p, caption))
         return gallery_display, metrics_df, lb
     run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
-    gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
-                "- `reference` can be empty but accuracy/hallucination will be weaker.\n"
-                "- Visualization images are available in the Gallery and a CSV report is downloadable.")
 demo.launch()

+# # app.py
+# """
+# Gradio application entrypoint for Hugging Face Spaces.
+# """
+# import os
+# import tempfile
+# import pandas as pd
+# import gradio as gr
+# from evaluator import evaluate_dataframe
+# from synthetic_data import generate_synthetic_dataset
+# # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
+# def save_uploaded(file_obj):
+#     if not file_obj:
+#         return None
+#     # file_obj can be a dictionary or a file-like object depending on Gradio version
+#     try:
+#         path = file_obj.name
+#         return path
+#     except Exception:
+#         # fallback: write bytes to temp file
+#         data = file_obj.read()
+#         suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
+#         fd, tmp = tempfile.mkstemp(suffix=suffix)
+#         with os.fdopen(fd, "wb") as f:
+#             f.write(data)
+#         return tmp
+# def load_file_to_df(path):
+#     if path is None:
+#         return None
+#     # Try CSV
+#     try:
+#         if path.endswith(".csv"):
+#             return pd.read_csv(path)
+#         # JSONL
+#         try:
+#             return pd.read_json(path, lines=True)
+#         except ValueError:
+#             return pd.read_json(path)
+#     except Exception as e:
+#         # As last resort, raise
+#         raise e
+# def run_evaluation(file_obj):
+#     # If no file provided, use synthetic demo
+#     if file_obj is None:
+#         df = generate_synthetic_dataset(num_agents=3, num_samples=12)
+#     else:
+#         path = save_uploaded(file_obj)
+#         df = load_file_to_df(path)
+#     # Ensure required columns exist; otherwise, attempt to map common alternatives
+#     if df is None:
+#         return None, "No data loaded", None
+#     # Try to normalize column names
+#     cols = {c.lower(): c for c in df.columns}
+#     # rename common variants
+#     rename_map = {}
+#     for k in ["prompt", "response", "task", "agent", "reference"]:
+#         if k not in cols:
+#             # try variants
+#             if k == "reference":
+#                 for alt in ["answer", "ground_truth", "ref"]:
+#                     if alt in cols:
+#                         rename_map[cols[alt]] = k
+#                         break
+#             else:
+#                 for alt in [k, k.capitalize(), k.upper()]:
+#                     if alt.lower() in cols:
+#                         rename_map[cols[alt.lower()]] = k
+#     if rename_map:
+#         df = df.rename(columns=rename_map)
+#     metrics_df, images, leaderboard = evaluate_dataframe(df)
+#     # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
+#     gallery_items = [p for (p, caption) in images]
+#     captions = [caption for (p, caption) in images]
+#     # Save a CSV report for download
+#     out_csv = "/tmp/eval_results.csv"
+#     metrics_df.to_csv(out_csv, index=False)
+#     return (gallery_items, captions), metrics_df, leaderboard
+# # Build Gradio UI
+# with gr.Blocks() as demo:
+#     gr.Markdown("# Agentic Evaluation Framework")
+#     gr.Markdown(
+#         "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
+#         "If no file is uploaded, a small synthetic demo will run."
+#     )
+#     with gr.Row():
+#         file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
+#         run_btn = gr.Button("Run Evaluation")
+#         download_report = gr.File(label="Download CSV Report")
+#     # ✅ Fixed Gallery (removed .style, added columns=2)
+#     gallery = gr.Gallery(
+#         label="Visualization Outputs",
+#         columns=2,
+#         height="auto"
+#     )
+#     table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
+#     leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")
+#     def on_run(file_in):
+#         (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
+#         # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
+#         gallery_display = []
+#         for i, p in enumerate(gallery_items):
+#             caption = captions[i] if i < len(captions) else ""
+#             gallery_display.append((p, caption))
+#         return gallery_display, metrics_df, lb
+#     run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
+#     gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
+#                 "- `reference` can be empty but accuracy/hallucination will be weaker.\n"
+#                 "- Visualization images are available in the Gallery and a CSV report is downloadable.")
+# demo.launch()
 # app.py
 """
 Gradio application entrypoint for Hugging Face Spaces.
 import tempfile
 import pandas as pd
 import gradio as gr
+from evaluation import evaluate_dataframe   # ✅ updated import
 from synthetic_data import generate_synthetic_dataset
+# Helper to save uploaded file
 def save_uploaded(file_obj):
     if not file_obj:
         return None
     try:
+        return file_obj.name
     except Exception:
         data = file_obj.read()
         suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
         fd, tmp = tempfile.mkstemp(suffix=suffix)
 def load_file_to_df(path):
     if path is None:
         return None
     try:
         if path.endswith(".csv"):
             return pd.read_csv(path)
         try:
             return pd.read_json(path, lines=True)
         except ValueError:
             return pd.read_json(path)
     except Exception as e:
         raise e
 def run_evaluation(file_obj):
     if file_obj is None:
         df = generate_synthetic_dataset(num_agents=3, num_samples=12)
     else:
         path = save_uploaded(file_obj)
         df = load_file_to_df(path)
     if df is None:
         return None, "No data loaded", None
+    # Normalize column names
     cols = {c.lower(): c for c in df.columns}
     rename_map = {}
     for k in ["prompt", "response", "task", "agent", "reference"]:
         if k not in cols:
             if k == "reference":
                 for alt in ["answer", "ground_truth", "ref"]:
                     if alt in cols:
     metrics_df, images, leaderboard = evaluate_dataframe(df)
     gallery_items = [p for (p, caption) in images]
     captions = [caption for (p, caption) in images]
     out_csv = "/tmp/eval_results.csv"
     metrics_df.to_csv(out_csv, index=False)
 with gr.Blocks() as demo:
     gr.Markdown("# Agentic Evaluation Framework")
     gr.Markdown(
+        "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
+        "If no file is uploaded, a synthetic demo will run."
     )
     with gr.Row():
+        file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
         run_btn = gr.Button("Run Evaluation")
         download_report = gr.File(label="Download CSV Report")
+    gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
     table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
+    leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
     def on_run(file_in):
         (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
+        gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
         return gallery_display, metrics_df, lb
     run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
+    gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
+                "- `reference` optional.\n- Download CSV report after evaluation.")
 demo.launch()