Spaces:
Runtime error
Runtime error
| # # app.py | |
| # """ | |
| # Gradio application entrypoint for Hugging Face Spaces. | |
| # """ | |
| # import os | |
| # import tempfile | |
| # import pandas as pd | |
| # import gradio as gr | |
| # from evaluator import evaluate_dataframe | |
| # from synthetic_data import generate_synthetic_dataset | |
| # # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object) | |
| # def save_uploaded(file_obj): | |
| # if not file_obj: | |
| # return None | |
| # # file_obj can be a dictionary or a file-like object depending on Gradio version | |
| # try: | |
| # path = file_obj.name | |
| # return path | |
| # except Exception: | |
| # # fallback: write bytes to temp file | |
| # data = file_obj.read() | |
| # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" | |
| # fd, tmp = tempfile.mkstemp(suffix=suffix) | |
| # with os.fdopen(fd, "wb") as f: | |
| # f.write(data) | |
| # return tmp | |
| # def load_file_to_df(path): | |
| # if path is None: | |
| # return None | |
| # # Try CSV | |
| # try: | |
| # if path.endswith(".csv"): | |
| # return pd.read_csv(path) | |
| # # JSONL | |
| # try: | |
| # return pd.read_json(path, lines=True) | |
| # except ValueError: | |
| # return pd.read_json(path) | |
| # except Exception as e: | |
| # # As last resort, raise | |
| # raise e | |
| # def run_evaluation(file_obj): | |
| # # If no file provided, use synthetic demo | |
| # if file_obj is None: | |
| # df = generate_synthetic_dataset(num_agents=3, num_samples=12) | |
| # else: | |
| # path = save_uploaded(file_obj) | |
| # df = load_file_to_df(path) | |
| # # Ensure required columns exist; otherwise, attempt to map common alternatives | |
| # if df is None: | |
| # return None, "No data loaded", None | |
| # # Try to normalize column names | |
| # cols = {c.lower(): c for c in df.columns} | |
| # # rename common variants | |
| # rename_map = {} | |
| # for k in ["prompt", "response", "task", "agent", "reference"]: | |
| # if k not in cols: | |
| # # try variants | |
| # if k == "reference": | |
| # for alt in ["answer", "ground_truth", "ref"]: | |
| # if alt in cols: | |
| # rename_map[cols[alt]] = k | |
| # break | |
| # else: | |
| # for alt in [k, k.capitalize(), k.upper()]: | |
| # if alt.lower() in cols: | |
| # rename_map[cols[alt.lower()]] = k | |
| # if rename_map: | |
| # df = df.rename(columns=rename_map) | |
| # metrics_df, images, leaderboard = evaluate_dataframe(df) | |
| # # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images. | |
| # gallery_items = [p for (p, caption) in images] | |
| # captions = [caption for (p, caption) in images] | |
| # # Save a CSV report for download | |
| # out_csv = "/tmp/eval_results.csv" | |
| # metrics_df.to_csv(out_csv, index=False) | |
| # return (gallery_items, captions), metrics_df, leaderboard | |
| # # Build Gradio UI | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("# Agentic Evaluation Framework") | |
| # gr.Markdown( | |
| # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). " | |
| # "If no file is uploaded, a small synthetic demo will run." | |
| # ) | |
| # with gr.Row(): | |
| # file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"]) | |
| # run_btn = gr.Button("Run Evaluation") | |
| # download_report = gr.File(label="Download CSV Report") | |
| # # β Fixed Gallery (removed .style, added columns=2) | |
| # gallery = gr.Gallery( | |
| # label="Visualization Outputs", | |
| # columns=2, | |
| # height="auto" | |
| # ) | |
| # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") | |
| # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)") | |
| # def on_run(file_in): | |
| # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) | |
| # # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption) | |
| # gallery_display = [] | |
| # for i, p in enumerate(gallery_items): | |
| # caption = captions[i] if i < len(captions) else "" | |
| # gallery_display.append((p, caption)) | |
| # return gallery_display, metrics_df, lb | |
| # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) | |
| # gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). " | |
| # "- `reference` can be empty but accuracy/hallucination will be weaker.\n" | |
| # "- Visualization images are available in the Gallery and a CSV report is downloadable.") | |
| # demo.launch() | |
| # app.py | |
| # """ | |
| # Gradio application entrypoint for Hugging Face Spaces. | |
| # """ | |
| # import os | |
| # import tempfile | |
| # import pandas as pd | |
| # import gradio as gr | |
| # from evaluation import evaluate_dataframe # β updated import | |
| # from synthetic_data import generate_synthetic_dataset | |
| # # Helper to save uploaded file | |
| # def save_uploaded(file_obj): | |
| # if not file_obj: | |
| # return None | |
| # try: | |
| # return file_obj.name | |
| # except Exception: | |
| # data = file_obj.read() | |
| # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" | |
| # fd, tmp = tempfile.mkstemp(suffix=suffix) | |
| # with os.fdopen(fd, "wb") as f: | |
| # f.write(data) | |
| # return tmp | |
| # def load_file_to_df(path): | |
| # if path is None: | |
| # return None | |
| # try: | |
| # if path.endswith(".csv"): | |
| # return pd.read_csv(path) | |
| # try: | |
| # return pd.read_json(path, lines=True) | |
| # except ValueError: | |
| # return pd.read_json(path) | |
| # except Exception as e: | |
| # raise e | |
| # def run_evaluation(file_obj): | |
| # if file_obj is None: | |
| # df = generate_synthetic_dataset(num_agents=3, num_samples=12) | |
| # else: | |
| # path = save_uploaded(file_obj) | |
| # df = load_file_to_df(path) | |
| # if df is None: | |
| # return None, "No data loaded", None | |
| # # Normalize column names | |
| # cols = {c.lower(): c for c in df.columns} | |
| # rename_map = {} | |
| # for k in ["prompt", "response", "task", "agent", "reference"]: | |
| # if k not in cols: | |
| # if k == "reference": | |
| # for alt in ["answer", "ground_truth", "ref"]: | |
| # if alt in cols: | |
| # rename_map[cols[alt]] = k | |
| # break | |
| # else: | |
| # for alt in [k, k.capitalize(), k.upper()]: | |
| # if alt.lower() in cols: | |
| # rename_map[cols[alt.lower()]] = k | |
| # if rename_map: | |
| # df = df.rename(columns=rename_map) | |
| # metrics_df, images, leaderboard = evaluate_dataframe(df) | |
| # gallery_items = [p for (p, caption) in images] | |
| # captions = [caption for (p, caption) in images] | |
| # out_csv = "/tmp/eval_results.csv" | |
| # metrics_df.to_csv(out_csv, index=False) | |
| # return (gallery_items, captions), metrics_df, leaderboard | |
| # # Build Gradio UI | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("# Agentic Evaluation Framework") | |
| # gr.Markdown( | |
| # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. " | |
| # "If no file is uploaded, a synthetic demo will run." | |
| # ) | |
| # with gr.Row(): | |
| # file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"]) | |
| # run_btn = gr.Button("Run Evaluation") | |
| # download_report = gr.File(label="Download CSV Report") | |
| # gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto") | |
| # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") | |
| # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)") | |
| # def on_run(file_in): | |
| # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) | |
| # gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)] | |
| # return gallery_display, metrics_df, lb | |
| # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) | |
| # gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). " | |
| # "- `reference` optional.\n- Download CSV report after evaluation.") | |
| # demo.launch() | |
| """ | |
| Gradio application entrypoint for Hugging Face Spaces. | |
| """ | |
| import os | |
| import tempfile | |
| import pandas as pd | |
| import gradio as gr | |
| from evaluator import evaluate_dataframe | |
| from synthetic_data import generate_synthetic_dataset | |
| # ----------------------------- | |
| # File Handling | |
| # ----------------------------- | |
| def save_uploaded(file_obj): | |
| if not file_obj: | |
| return None | |
| try: | |
| return file_obj.name | |
| except Exception: | |
| data = file_obj.read() | |
| suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" | |
| fd, tmp = tempfile.mkstemp(suffix=suffix) | |
| with os.fdopen(fd, "wb") as f: | |
| f.write(data) | |
| return tmp | |
| def load_file_to_df(path): | |
| if path is None: | |
| return None | |
| try: | |
| if path.endswith(".csv"): | |
| return pd.read_csv(path) | |
| try: | |
| return pd.read_json(path, lines=True) | |
| except ValueError: | |
| return pd.read_json(path) | |
| except Exception as e: | |
| raise e | |
| # ----------------------------- | |
| # Evaluation Pipeline | |
| # ----------------------------- | |
| def run_evaluation(file_obj): | |
| if file_obj is None: | |
| df = generate_synthetic_dataset(num_agents=3, num_samples=12) | |
| else: | |
| path = save_uploaded(file_obj) | |
| df = load_file_to_df(path) | |
| if df is None: | |
| return None, "No data loaded", None | |
| # Normalize column names | |
| cols = {c.lower(): c for c in df.columns} | |
| rename_map = {} | |
| for k in ["task_id", "prompt", "response", "agent", "reference"]: | |
| if k not in cols: | |
| for alt in [k, k.capitalize(), k.upper()]: | |
| if alt.lower() in cols: | |
| rename_map[cols[alt.lower()]] = k | |
| break | |
| if rename_map: | |
| df = df.rename(columns=rename_map) | |
| metrics_df, images, leaderboard = evaluate_dataframe(df) | |
| gallery_items = [p for (p, caption) in images] | |
| captions = [caption for (p, caption) in images] | |
| # Save CSV report | |
| out_csv = "/tmp/eval_results.csv" | |
| metrics_df.to_csv(out_csv, index=False) | |
| return (gallery_items, captions), metrics_df, leaderboard | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π§ͺ Agentic Evaluation Framework") | |
| gr.Markdown( | |
| "Upload a CSV/JSON/JSONL with columns: " | |
| "`task_id,prompt,response,agent,reference`. " | |
| "If no file is uploaded, a small synthetic demo will run." | |
| ) | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"]) | |
| run_btn = gr.Button("Run Evaluation") | |
| download_report = gr.File(label="Download CSV Report") | |
| gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto") | |
| table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") | |
| leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent)") | |
| def on_run(file_in): | |
| (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) | |
| gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)] | |
| return gallery_display, metrics_df, lb | |
| run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) | |
| gr.Markdown("## Tips\n- Ensure columns: `task_id,prompt,response,agent,reference` " | |
| "(case-insensitive).\n- Visualization images in Gallery.\n- Download CSV after evaluation.") | |
| demo.launch() | |