Supastrikas-004's picture
Update app.py (#17)
5cfcc03 verified
# # app.py
# """
# Gradio application entrypoint for Hugging Face Spaces.
# """
# import os
# import tempfile
# import pandas as pd
# import gradio as gr
# from evaluator import evaluate_dataframe
# from synthetic_data import generate_synthetic_dataset
# # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
# def save_uploaded(file_obj):
# if not file_obj:
# return None
# # file_obj can be a dictionary or a file-like object depending on Gradio version
# try:
# path = file_obj.name
# return path
# except Exception:
# # fallback: write bytes to temp file
# data = file_obj.read()
# suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
# fd, tmp = tempfile.mkstemp(suffix=suffix)
# with os.fdopen(fd, "wb") as f:
# f.write(data)
# return tmp
# def load_file_to_df(path):
# if path is None:
# return None
# # Try CSV
# try:
# if path.endswith(".csv"):
# return pd.read_csv(path)
# # JSONL
# try:
# return pd.read_json(path, lines=True)
# except ValueError:
# return pd.read_json(path)
# except Exception as e:
# # As last resort, raise
# raise e
# def run_evaluation(file_obj):
# # If no file provided, use synthetic demo
# if file_obj is None:
# df = generate_synthetic_dataset(num_agents=3, num_samples=12)
# else:
# path = save_uploaded(file_obj)
# df = load_file_to_df(path)
# # Ensure required columns exist; otherwise, attempt to map common alternatives
# if df is None:
# return None, "No data loaded", None
# # Try to normalize column names
# cols = {c.lower(): c for c in df.columns}
# # rename common variants
# rename_map = {}
# for k in ["prompt", "response", "task", "agent", "reference"]:
# if k not in cols:
# # try variants
# if k == "reference":
# for alt in ["answer", "ground_truth", "ref"]:
# if alt in cols:
# rename_map[cols[alt]] = k
# break
# else:
# for alt in [k, k.capitalize(), k.upper()]:
# if alt.lower() in cols:
# rename_map[cols[alt.lower()]] = k
# if rename_map:
# df = df.rename(columns=rename_map)
# metrics_df, images, leaderboard = evaluate_dataframe(df)
# # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
# gallery_items = [p for (p, caption) in images]
# captions = [caption for (p, caption) in images]
# # Save a CSV report for download
# out_csv = "/tmp/eval_results.csv"
# metrics_df.to_csv(out_csv, index=False)
# return (gallery_items, captions), metrics_df, leaderboard
# # Build Gradio UI
# with gr.Blocks() as demo:
# gr.Markdown("# Agentic Evaluation Framework")
# gr.Markdown(
# "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
# "If no file is uploaded, a small synthetic demo will run."
# )
# with gr.Row():
# file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
# run_btn = gr.Button("Run Evaluation")
# download_report = gr.File(label="Download CSV Report")
# # βœ… Fixed Gallery (removed .style, added columns=2)
# gallery = gr.Gallery(
# label="Visualization Outputs",
# columns=2,
# height="auto"
# )
# table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
# leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")
# def on_run(file_in):
# (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
# # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
# gallery_display = []
# for i, p in enumerate(gallery_items):
# caption = captions[i] if i < len(captions) else ""
# gallery_display.append((p, caption))
# return gallery_display, metrics_df, lb
# run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
# gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
# "- `reference` can be empty but accuracy/hallucination will be weaker.\n"
# "- Visualization images are available in the Gallery and a CSV report is downloadable.")
# demo.launch()
# app.py
# """
# Gradio application entrypoint for Hugging Face Spaces.
# """
# import os
# import tempfile
# import pandas as pd
# import gradio as gr
# from evaluation import evaluate_dataframe # βœ… updated import
# from synthetic_data import generate_synthetic_dataset
# # Helper to save uploaded file
# def save_uploaded(file_obj):
# if not file_obj:
# return None
# try:
# return file_obj.name
# except Exception:
# data = file_obj.read()
# suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
# fd, tmp = tempfile.mkstemp(suffix=suffix)
# with os.fdopen(fd, "wb") as f:
# f.write(data)
# return tmp
# def load_file_to_df(path):
# if path is None:
# return None
# try:
# if path.endswith(".csv"):
# return pd.read_csv(path)
# try:
# return pd.read_json(path, lines=True)
# except ValueError:
# return pd.read_json(path)
# except Exception as e:
# raise e
# def run_evaluation(file_obj):
# if file_obj is None:
# df = generate_synthetic_dataset(num_agents=3, num_samples=12)
# else:
# path = save_uploaded(file_obj)
# df = load_file_to_df(path)
# if df is None:
# return None, "No data loaded", None
# # Normalize column names
# cols = {c.lower(): c for c in df.columns}
# rename_map = {}
# for k in ["prompt", "response", "task", "agent", "reference"]:
# if k not in cols:
# if k == "reference":
# for alt in ["answer", "ground_truth", "ref"]:
# if alt in cols:
# rename_map[cols[alt]] = k
# break
# else:
# for alt in [k, k.capitalize(), k.upper()]:
# if alt.lower() in cols:
# rename_map[cols[alt.lower()]] = k
# if rename_map:
# df = df.rename(columns=rename_map)
# metrics_df, images, leaderboard = evaluate_dataframe(df)
# gallery_items = [p for (p, caption) in images]
# captions = [caption for (p, caption) in images]
# out_csv = "/tmp/eval_results.csv"
# metrics_df.to_csv(out_csv, index=False)
# return (gallery_items, captions), metrics_df, leaderboard
# # Build Gradio UI
# with gr.Blocks() as demo:
# gr.Markdown("# Agentic Evaluation Framework")
# gr.Markdown(
# "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
# "If no file is uploaded, a synthetic demo will run."
# )
# with gr.Row():
# file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
# run_btn = gr.Button("Run Evaluation")
# download_report = gr.File(label="Download CSV Report")
# gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
# table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
# leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
# def on_run(file_in):
# (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
# gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
# return gallery_display, metrics_df, lb
# run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
# gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
# "- `reference` optional.\n- Download CSV report after evaluation.")
# demo.launch()
# app.py (patch)
import gradio as gr
import pandas as pd
import os
import tempfile
from evaluator import evaluate_dataframe, generate_visualizations
# -----------------------
# Helpers
# -----------------------
def save_uploaded(file_obj):
"""Return a filesystem path for the uploaded file object."""
if not file_obj:
return None
if isinstance(file_obj, dict):
for key in ("name", "path", "file"):
p = file_obj.get(key)
if p and os.path.exists(p):
return p
if isinstance(file_obj, str) and os.path.exists(file_obj):
return file_obj
if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
return file_obj.name
# fallback: dump bytes to tmp file
fd, tmp = tempfile.mkstemp(suffix=".csv")
with os.fdopen(fd, "wb") as f:
f.write(file_obj.read())
return tmp
def load_file_to_df(path):
if path is None:
return None
p = str(path)
try:
if p.lower().endswith(".csv"):
return pd.read_csv(p, sep=None, engine="python")
except Exception:
pass
try:
return pd.read_json(p, lines=True)
except Exception:
return pd.read_json(p)
# -----------------------
# Evaluation wrapper
# -----------------------
def run_evaluation(file):
path = save_uploaded(file)
df = load_file_to_df(path)
if df is None or df.empty:
return None, None, None, None, None
# Normalize column names
df.columns = [c.strip() for c in df.columns]
# Expected cols: task_id, task_type, prompt, agent, response, metadata
for col in ["task_id", "task_type", "prompt", "agent", "response", "metadata"]:
if col not in df.columns:
df[col] = ""
# Add reference column if not provided
if "reference" not in df.columns:
df["reference"] = ""
metrics_df, images, leaderboard = evaluate_dataframe(df)
figs = generate_visualizations(metrics_df, leaderboard)
# save evaluation results
csv_path = "/tmp/eval_results.csv"
metrics_df.to_csv(csv_path, index=False)
return figs, metrics_df, leaderboard, csv_path
# -----------------------
# Gradio UI
# -----------------------
with gr.Blocks(title="Agentic Evaluation Framework") as demo:
gr.Markdown("## Agentic Evaluation Framework")
gr.Markdown("Upload a CSV file with format: "
"`task_id, task_type, prompt, agent, response, metadata`")
with gr.Row():
file_upload = gr.File(label="Upload CSV", type="file")
eval_btn = gr.Button("Run Evaluation", variant="primary")
gallery = gr.Gallery(label="Visualizations", columns=2, height="auto")
metrics_df_out = gr.Dataframe(label="Evaluation Results")
leaderboard_out = gr.Dataframe(label="Leaderboard (Avg Scores)")
download_out = gr.File(label="Download CSV Report")
eval_btn.click(
fn=run_evaluation,
inputs=file_upload,
outputs=[gallery, metrics_df_out, leaderboard_out, download_out]
)
if __name__ == "__main__":
demo.launch()