Spaces:

Supastrikas-004
/

evaluation-framework

Runtime error

App Files Files Community

evaluation-framework / app.py

Supastrikas-004

Update app.py

d73f6c0 verified 7 months ago

raw

history blame

4.54 kB

	# app.py
	"""
	Gradio application entrypoint for Hugging Face Spaces.
	"""

	import os
	import tempfile
	import pandas as pd
	import gradio as gr
	from evaluator import evaluate_dataframe
	from synthetic_data import generate_synthetic_dataset

	# Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
	def save_uploaded(file_obj):
	if not file_obj:
	return None
	# file_obj can be a dictionary or a file-like object depending on Gradio version
	try:
	path = file_obj.name
	return path
	except Exception:
	# fallback: write bytes to temp file
	data = file_obj.read()
	suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
	fd, tmp = tempfile.mkstemp(suffix=suffix)
	with os.fdopen(fd, "wb") as f:
	f.write(data)
	return tmp

	def load_file_to_df(path):
	if path is None:
	return None
	# Try CSV
	try:
	if path.endswith(".csv"):
	return pd.read_csv(path)
	# JSONL
	try:
	return pd.read_json(path, lines=True)
	except ValueError:
	return pd.read_json(path)
	except Exception as e:
	# As last resort, raise
	raise e

	def run_evaluation(file_obj):
	# If no file provided, use synthetic demo
	if file_obj is None:
	df = generate_synthetic_dataset(num_agents=3, num_samples=12)
	else:
	path = save_uploaded(file_obj)
	df = load_file_to_df(path)

	# Ensure required columns exist; otherwise, attempt to map common alternatives
	if df is None:
	return None, "No data loaded", None

	# Try to normalize column names
	cols = {c.lower(): c for c in df.columns}
	# rename common variants
	rename_map = {}
	for k in ["prompt", "response", "task", "agent", "reference"]:
	if k not in cols:
	# try variants
	if k == "reference":
	for alt in ["answer", "ground_truth", "ref"]:
	if alt in cols:
	rename_map[cols[alt]] = k
	break
	else:
	for alt in [k, k.capitalize(), k.upper()]:
	if alt.lower() in cols:
	rename_map[cols[alt.lower()]] = k
	if rename_map:
	df = df.rename(columns=rename_map)

	metrics_df, images, leaderboard = evaluate_dataframe(df)

	# Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
	gallery_items = [p for (p, caption) in images]
	captions = [caption for (p, caption) in images]

	# Save a CSV report for download
	out_csv = "/tmp/eval_results.csv"
	metrics_df.to_csv(out_csv, index=False)

	return (gallery_items, captions), metrics_df, leaderboard

	# Build Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# Agentic Evaluation Framework")
	gr.Markdown(
	"Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
	"If no file is uploaded, a small synthetic demo will run."
	)

	with gr.Row():
	file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
	run_btn = gr.Button("Run Evaluation")
	download_report = gr.File(label="Download CSV Report")

	# ✅ Fixed Gallery (removed .style, added columns=2)
	gallery = gr.Gallery(
	label="Visualization Outputs",
	columns=2,
	height="auto"
	)
	table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
	leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")

	def on_run(file_in):
	(gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
	# Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
	gallery_display = []
	for i, p in enumerate(gallery_items):
	caption = captions[i] if i < len(captions) else ""
	gallery_display.append((p, caption))
	return gallery_display, metrics_df, lb

	run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])

	gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
	"- `reference` can be empty but accuracy/hallucination will be weaker.\n"
	"- Visualization images are available in the Gallery and a CSV report is downloadable.")

	demo.launch()