Spaces:

Supastrikas-004
/

evaluation-framework

Runtime error

App Files Files Community

evaluation-framework / app.py

Supastrikas-004

Update app.py (#10)

439407c verified 5 months ago

raw

history blame

11.9 kB

	# # app.py
	# """
	# Gradio application entrypoint for Hugging Face Spaces.
	# """

	# import os
	# import tempfile
	# import pandas as pd
	# import gradio as gr
	# from evaluator import evaluate_dataframe
	# from synthetic_data import generate_synthetic_dataset

	# # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
	# def save_uploaded(file_obj):
	# if not file_obj:
	# return None
	# # file_obj can be a dictionary or a file-like object depending on Gradio version
	# try:
	# path = file_obj.name
	# return path
	# except Exception:
	# # fallback: write bytes to temp file
	# data = file_obj.read()
	# suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
	# fd, tmp = tempfile.mkstemp(suffix=suffix)
	# with os.fdopen(fd, "wb") as f:
	# f.write(data)
	# return tmp

	# def load_file_to_df(path):
	# if path is None:
	# return None
	# # Try CSV
	# try:
	# if path.endswith(".csv"):
	# return pd.read_csv(path)
	# # JSONL
	# try:
	# return pd.read_json(path, lines=True)
	# except ValueError:
	# return pd.read_json(path)
	# except Exception as e:
	# # As last resort, raise
	# raise e

	# def run_evaluation(file_obj):
	# # If no file provided, use synthetic demo
	# if file_obj is None:
	# df = generate_synthetic_dataset(num_agents=3, num_samples=12)
	# else:
	# path = save_uploaded(file_obj)
	# df = load_file_to_df(path)

	# # Ensure required columns exist; otherwise, attempt to map common alternatives
	# if df is None:
	# return None, "No data loaded", None

	# # Try to normalize column names
	# cols = {c.lower(): c for c in df.columns}
	# # rename common variants
	# rename_map = {}
	# for k in ["prompt", "response", "task", "agent", "reference"]:
	# if k not in cols:
	# # try variants
	# if k == "reference":
	# for alt in ["answer", "ground_truth", "ref"]:
	# if alt in cols:
	# rename_map[cols[alt]] = k
	# break
	# else:
	# for alt in [k, k.capitalize(), k.upper()]:
	# if alt.lower() in cols:
	# rename_map[cols[alt.lower()]] = k
	# if rename_map:
	# df = df.rename(columns=rename_map)

	# metrics_df, images, leaderboard = evaluate_dataframe(df)

	# # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
	# gallery_items = [p for (p, caption) in images]
	# captions = [caption for (p, caption) in images]

	# # Save a CSV report for download
	# out_csv = "/tmp/eval_results.csv"
	# metrics_df.to_csv(out_csv, index=False)

	# return (gallery_items, captions), metrics_df, leaderboard

	# # Build Gradio UI
	# with gr.Blocks() as demo:
	# gr.Markdown("# Agentic Evaluation Framework")
	# gr.Markdown(
	# "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
	# "If no file is uploaded, a small synthetic demo will run."
	# )

	# with gr.Row():
	# file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
	# run_btn = gr.Button("Run Evaluation")
	# download_report = gr.File(label="Download CSV Report")

	# # ✅ Fixed Gallery (removed .style, added columns=2)
	# gallery = gr.Gallery(
	# label="Visualization Outputs",
	# columns=2,
	# height="auto"
	# )
	# table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
	# leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")

	# def on_run(file_in):
	# (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
	# # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
	# gallery_display = []
	# for i, p in enumerate(gallery_items):
	# caption = captions[i] if i < len(captions) else ""
	# gallery_display.append((p, caption))
	# return gallery_display, metrics_df, lb

	# run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])

	# gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
	# "- `reference` can be empty but accuracy/hallucination will be weaker.\n"
	# "- Visualization images are available in the Gallery and a CSV report is downloadable.")

	# demo.launch()
	# app.py
	# """
	# Gradio application entrypoint for Hugging Face Spaces.
	# """

	# import os
	# import tempfile
	# import pandas as pd
	# import gradio as gr
	# from evaluation import evaluate_dataframe # ✅ updated import
	# from synthetic_data import generate_synthetic_dataset

	# # Helper to save uploaded file
	# def save_uploaded(file_obj):
	# if not file_obj:
	# return None
	# try:
	# return file_obj.name
	# except Exception:
	# data = file_obj.read()
	# suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
	# fd, tmp = tempfile.mkstemp(suffix=suffix)
	# with os.fdopen(fd, "wb") as f:
	# f.write(data)
	# return tmp

	# def load_file_to_df(path):
	# if path is None:
	# return None
	# try:
	# if path.endswith(".csv"):
	# return pd.read_csv(path)
	# try:
	# return pd.read_json(path, lines=True)
	# except ValueError:
	# return pd.read_json(path)
	# except Exception as e:
	# raise e

	# def run_evaluation(file_obj):
	# if file_obj is None:
	# df = generate_synthetic_dataset(num_agents=3, num_samples=12)
	# else:
	# path = save_uploaded(file_obj)
	# df = load_file_to_df(path)

	# if df is None:
	# return None, "No data loaded", None

	# # Normalize column names
	# cols = {c.lower(): c for c in df.columns}
	# rename_map = {}
	# for k in ["prompt", "response", "task", "agent", "reference"]:
	# if k not in cols:
	# if k == "reference":
	# for alt in ["answer", "ground_truth", "ref"]:
	# if alt in cols:
	# rename_map[cols[alt]] = k
	# break
	# else:
	# for alt in [k, k.capitalize(), k.upper()]:
	# if alt.lower() in cols:
	# rename_map[cols[alt.lower()]] = k
	# if rename_map:
	# df = df.rename(columns=rename_map)

	# metrics_df, images, leaderboard = evaluate_dataframe(df)

	# gallery_items = [p for (p, caption) in images]
	# captions = [caption for (p, caption) in images]

	# out_csv = "/tmp/eval_results.csv"
	# metrics_df.to_csv(out_csv, index=False)

	# return (gallery_items, captions), metrics_df, leaderboard

	# # Build Gradio UI
	# with gr.Blocks() as demo:
	# gr.Markdown("# Agentic Evaluation Framework")
	# gr.Markdown(
	# "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
	# "If no file is uploaded, a synthetic demo will run."
	# )

	# with gr.Row():
	# file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
	# run_btn = gr.Button("Run Evaluation")
	# download_report = gr.File(label="Download CSV Report")

	# gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
	# table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
	# leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")

	# def on_run(file_in):
	# (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
	# gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
	# return gallery_display, metrics_df, lb

	# run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])

	# gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
	# "- `reference` optional.\n- Download CSV report after evaluation.")

	# demo.launch()

	"""
	Gradio application entrypoint for Hugging Face Spaces.
	"""

	import os
	import tempfile
	import pandas as pd
	import gradio as gr
	from evaluator import evaluate_dataframe
	from synthetic_data import generate_synthetic_dataset

	# -----------------------------
	# File Handling
	# -----------------------------
	def save_uploaded(file_obj):
	if not file_obj:
	return None
	try:
	return file_obj.name
	except Exception:
	data = file_obj.read()
	suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
	fd, tmp = tempfile.mkstemp(suffix=suffix)
	with os.fdopen(fd, "wb") as f:
	f.write(data)
	return tmp

	def load_file_to_df(path):
	if path is None:
	return None
	try:
	if path.endswith(".csv"):
	return pd.read_csv(path)
	try:
	return pd.read_json(path, lines=True)
	except ValueError:
	return pd.read_json(path)
	except Exception as e:
	raise e


	# -----------------------------
	# Evaluation Pipeline
	# -----------------------------
	def run_evaluation(file_obj):
	if file_obj is None:
	df = generate_synthetic_dataset(num_agents=3, num_samples=12)
	else:
	path = save_uploaded(file_obj)
	df = load_file_to_df(path)

	if df is None:
	return None, "No data loaded", None

	# Normalize column names
	cols = {c.lower(): c for c in df.columns}
	rename_map = {}
	for k in ["task_id", "prompt", "response", "agent", "reference"]:
	if k not in cols:
	for alt in [k, k.capitalize(), k.upper()]:
	if alt.lower() in cols:
	rename_map[cols[alt.lower()]] = k
	break
	if rename_map:
	df = df.rename(columns=rename_map)

	metrics_df, images, leaderboard = evaluate_dataframe(df)

	gallery_items = [p for (p, caption) in images]
	captions = [caption for (p, caption) in images]

	# Save CSV report
	out_csv = "/tmp/eval_results.csv"
	metrics_df.to_csv(out_csv, index=False)

	return (gallery_items, captions), metrics_df, leaderboard


	# -----------------------------
	# Gradio UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# 🧪 Agentic Evaluation Framework")
	gr.Markdown(
	"Upload a CSV/JSON/JSONL with columns: "
	"`task_id,prompt,response,agent,reference`. "
	"If no file is uploaded, a small synthetic demo will run."
	)

	with gr.Row():
	file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
	run_btn = gr.Button("Run Evaluation")
	download_report = gr.File(label="Download CSV Report")

	gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
	table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
	leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent)")

	def on_run(file_in):
	(gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
	gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
	return gallery_display, metrics_df, lb

	run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])

	gr.Markdown("## Tips\n- Ensure columns: `task_id,prompt,response,agent,reference` "
	"(case-insensitive).\n- Visualization images in Gallery.\n- Download CSV after evaluation.")

	demo.launch()