Spaces:
Runtime error
Runtime error
| # app.py | |
| """ | |
| Gradio application entrypoint for Hugging Face Spaces. | |
| """ | |
| import os | |
| import tempfile | |
| import pandas as pd | |
| import gradio as gr | |
| from evaluator import evaluate_dataframe | |
| from synthetic_data import generate_synthetic_dataset | |
| # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object) | |
| def save_uploaded(file_obj): | |
| if not file_obj: | |
| return None | |
| # file_obj can be a dictionary or a file-like object depending on Gradio version | |
| try: | |
| path = file_obj.name | |
| return path | |
| except Exception: | |
| # fallback: write bytes to temp file | |
| data = file_obj.read() | |
| suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" | |
| fd, tmp = tempfile.mkstemp(suffix=suffix) | |
| with os.fdopen(fd, "wb") as f: | |
| f.write(data) | |
| return tmp | |
| def load_file_to_df(path): | |
| if path is None: | |
| return None | |
| # Try CSV | |
| try: | |
| if path.endswith(".csv"): | |
| return pd.read_csv(path) | |
| # JSONL | |
| try: | |
| return pd.read_json(path, lines=True) | |
| except ValueError: | |
| return pd.read_json(path) | |
| except Exception as e: | |
| # As last resort, raise | |
| raise e | |
| def run_evaluation(file_obj): | |
| # If no file provided, use synthetic demo | |
| if file_obj is None: | |
| df = generate_synthetic_dataset(num_agents=3, num_samples=12) | |
| else: | |
| path = save_uploaded(file_obj) | |
| df = load_file_to_df(path) | |
| # Ensure required columns exist; otherwise, attempt to map common alternatives | |
| if df is None: | |
| return None, "No data loaded", None | |
| # Try to normalize column names | |
| cols = {c.lower(): c for c in df.columns} | |
| # rename common variants | |
| rename_map = {} | |
| for k in ["prompt", "response", "task", "agent", "reference"]: | |
| if k not in cols: | |
| # try variants | |
| if k == "reference": | |
| for alt in ["answer", "ground_truth", "ref"]: | |
| if alt in cols: | |
| rename_map[cols[alt]] = k | |
| break | |
| else: | |
| for alt in [k, k.capitalize(), k.upper()]: | |
| if alt.lower() in cols: | |
| rename_map[cols[alt.lower()]] = k | |
| if rename_map: | |
| df = df.rename(columns=rename_map) | |
| metrics_df, images, leaderboard = evaluate_dataframe(df) | |
| # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images. | |
| gallery_items = [p for (p, caption) in images] | |
| captions = [caption for (p, caption) in images] | |
| # Save a CSV report for download | |
| out_csv = "/tmp/eval_results.csv" | |
| metrics_df.to_csv(out_csv, index=False) | |
| return (gallery_items, captions), metrics_df, leaderboard | |
| # Build Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Agentic Evaluation Framework") | |
| gr.Markdown( | |
| "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). " | |
| "If no file is uploaded, a small synthetic demo will run." | |
| ) | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"]) | |
| run_btn = gr.Button("Run Evaluation") | |
| download_report = gr.File(label="Download CSV Report") | |
| # β Fixed Gallery (removed .style, added columns=2) | |
| gallery = gr.Gallery( | |
| label="Visualization Outputs", | |
| columns=2, | |
| height="auto" | |
| ) | |
| table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") | |
| leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)") | |
| def on_run(file_in): | |
| (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) | |
| # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption) | |
| gallery_display = [] | |
| for i, p in enumerate(gallery_items): | |
| caption = captions[i] if i < len(captions) else "" | |
| gallery_display.append((p, caption)) | |
| return gallery_display, metrics_df, lb | |
| run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) | |
| gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). " | |
| "- `reference` can be empty but accuracy/hallucination will be weaker.\n" | |
| "- Visualization images are available in the Gallery and a CSV report is downloadable.") | |
| demo.launch() | |