Spaces:
Runtime error
Runtime error
| # # app.py | |
| # """ | |
| # Gradio application entrypoint for Hugging Face Spaces. | |
| # """ | |
| # import os | |
| # import tempfile | |
| # import pandas as pd | |
| # import gradio as gr | |
| # from evaluator import evaluate_dataframe | |
| # from synthetic_data import generate_synthetic_dataset | |
| # # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object) | |
| # def save_uploaded(file_obj): | |
| # if not file_obj: | |
| # return None | |
| # # file_obj can be a dictionary or a file-like object depending on Gradio version | |
| # try: | |
| # path = file_obj.name | |
| # return path | |
| # except Exception: | |
| # # fallback: write bytes to temp file | |
| # data = file_obj.read() | |
| # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" | |
| # fd, tmp = tempfile.mkstemp(suffix=suffix) | |
| # with os.fdopen(fd, "wb") as f: | |
| # f.write(data) | |
| # return tmp | |
| # def load_file_to_df(path): | |
| # if path is None: | |
| # return None | |
| # # Try CSV | |
| # try: | |
| # if path.endswith(".csv"): | |
| # return pd.read_csv(path) | |
| # # JSONL | |
| # try: | |
| # return pd.read_json(path, lines=True) | |
| # except ValueError: | |
| # return pd.read_json(path) | |
| # except Exception as e: | |
| # # As last resort, raise | |
| # raise e | |
| # def run_evaluation(file_obj): | |
| # # If no file provided, use synthetic demo | |
| # if file_obj is None: | |
| # df = generate_synthetic_dataset(num_agents=3, num_samples=12) | |
| # else: | |
| # path = save_uploaded(file_obj) | |
| # df = load_file_to_df(path) | |
| # # Ensure required columns exist; otherwise, attempt to map common alternatives | |
| # if df is None: | |
| # return None, "No data loaded", None | |
| # # Try to normalize column names | |
| # cols = {c.lower(): c for c in df.columns} | |
| # # rename common variants | |
| # rename_map = {} | |
| # for k in ["prompt", "response", "task", "agent", "reference"]: | |
| # if k not in cols: | |
| # # try variants | |
| # if k == "reference": | |
| # for alt in ["answer", "ground_truth", "ref"]: | |
| # if alt in cols: | |
| # rename_map[cols[alt]] = k | |
| # break | |
| # else: | |
| # for alt in [k, k.capitalize(), k.upper()]: | |
| # if alt.lower() in cols: | |
| # rename_map[cols[alt.lower()]] = k | |
| # if rename_map: | |
| # df = df.rename(columns=rename_map) | |
| # metrics_df, images, leaderboard = evaluate_dataframe(df) | |
| # # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images. | |
| # gallery_items = [p for (p, caption) in images] | |
| # captions = [caption for (p, caption) in images] | |
| # # Save a CSV report for download | |
| # out_csv = "/tmp/eval_results.csv" | |
| # metrics_df.to_csv(out_csv, index=False) | |
| # return (gallery_items, captions), metrics_df, leaderboard | |
| # # Build Gradio UI | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("# Agentic Evaluation Framework") | |
| # gr.Markdown( | |
| # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). " | |
| # "If no file is uploaded, a small synthetic demo will run." | |
| # ) | |
| # with gr.Row(): | |
| # file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"]) | |
| # run_btn = gr.Button("Run Evaluation") | |
| # download_report = gr.File(label="Download CSV Report") | |
| # # β Fixed Gallery (removed .style, added columns=2) | |
| # gallery = gr.Gallery( | |
| # label="Visualization Outputs", | |
| # columns=2, | |
| # height="auto" | |
| # ) | |
| # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") | |
| # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)") | |
| # def on_run(file_in): | |
| # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) | |
| # # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption) | |
| # gallery_display = [] | |
| # for i, p in enumerate(gallery_items): | |
| # caption = captions[i] if i < len(captions) else "" | |
| # gallery_display.append((p, caption)) | |
| # return gallery_display, metrics_df, lb | |
| # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) | |
| # gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). " | |
| # "- `reference` can be empty but accuracy/hallucination will be weaker.\n" | |
| # "- Visualization images are available in the Gallery and a CSV report is downloadable.") | |
| # demo.launch() | |
| # app.py | |
| # """ | |
| # Gradio application entrypoint for Hugging Face Spaces. | |
| # """ | |
| # import os | |
| # import tempfile | |
| # import pandas as pd | |
| # import gradio as gr | |
| # from evaluation import evaluate_dataframe # β updated import | |
| # from synthetic_data import generate_synthetic_dataset | |
| # # Helper to save uploaded file | |
| # def save_uploaded(file_obj): | |
| # if not file_obj: | |
| # return None | |
| # try: | |
| # return file_obj.name | |
| # except Exception: | |
| # data = file_obj.read() | |
| # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" | |
| # fd, tmp = tempfile.mkstemp(suffix=suffix) | |
| # with os.fdopen(fd, "wb") as f: | |
| # f.write(data) | |
| # return tmp | |
| # def load_file_to_df(path): | |
| # if path is None: | |
| # return None | |
| # try: | |
| # if path.endswith(".csv"): | |
| # return pd.read_csv(path) | |
| # try: | |
| # return pd.read_json(path, lines=True) | |
| # except ValueError: | |
| # return pd.read_json(path) | |
| # except Exception as e: | |
| # raise e | |
| # def run_evaluation(file_obj): | |
| # if file_obj is None: | |
| # df = generate_synthetic_dataset(num_agents=3, num_samples=12) | |
| # else: | |
| # path = save_uploaded(file_obj) | |
| # df = load_file_to_df(path) | |
| # if df is None: | |
| # return None, "No data loaded", None | |
| # # Normalize column names | |
| # cols = {c.lower(): c for c in df.columns} | |
| # rename_map = {} | |
| # for k in ["prompt", "response", "task", "agent", "reference"]: | |
| # if k not in cols: | |
| # if k == "reference": | |
| # for alt in ["answer", "ground_truth", "ref"]: | |
| # if alt in cols: | |
| # rename_map[cols[alt]] = k | |
| # break | |
| # else: | |
| # for alt in [k, k.capitalize(), k.upper()]: | |
| # if alt.lower() in cols: | |
| # rename_map[cols[alt.lower()]] = k | |
| # if rename_map: | |
| # df = df.rename(columns=rename_map) | |
| # metrics_df, images, leaderboard = evaluate_dataframe(df) | |
| # gallery_items = [p for (p, caption) in images] | |
| # captions = [caption for (p, caption) in images] | |
| # out_csv = "/tmp/eval_results.csv" | |
| # metrics_df.to_csv(out_csv, index=False) | |
| # return (gallery_items, captions), metrics_df, leaderboard | |
| # # Build Gradio UI | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("# Agentic Evaluation Framework") | |
| # gr.Markdown( | |
| # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. " | |
| # "If no file is uploaded, a synthetic demo will run." | |
| # ) | |
| # with gr.Row(): | |
| # file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"]) | |
| # run_btn = gr.Button("Run Evaluation") | |
| # download_report = gr.File(label="Download CSV Report") | |
| # gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto") | |
| # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") | |
| # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)") | |
| # def on_run(file_in): | |
| # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) | |
| # gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)] | |
| # return gallery_display, metrics_df, lb | |
| # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) | |
| # gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). " | |
| # "- `reference` optional.\n- Download CSV report after evaluation.") | |
| # demo.launch() | |
| # app.py (patch) | |
| import gradio as gr | |
| import pandas as pd | |
| import os | |
| import tempfile | |
| from evaluator import evaluate_dataframe, generate_visualizations | |
| # ----------------------- | |
| # Helpers | |
| # ----------------------- | |
| def save_uploaded(file_obj): | |
| """Return a filesystem path for the uploaded file object.""" | |
| if not file_obj: | |
| return None | |
| if isinstance(file_obj, dict): | |
| for key in ("name", "path", "file"): | |
| p = file_obj.get(key) | |
| if p and os.path.exists(p): | |
| return p | |
| if isinstance(file_obj, str) and os.path.exists(file_obj): | |
| return file_obj | |
| if hasattr(file_obj, "name") and os.path.exists(file_obj.name): | |
| return file_obj.name | |
| # fallback: dump bytes to tmp file | |
| fd, tmp = tempfile.mkstemp(suffix=".csv") | |
| with os.fdopen(fd, "wb") as f: | |
| f.write(file_obj.read()) | |
| return tmp | |
| def load_file_to_df(path): | |
| if path is None: | |
| return None | |
| p = str(path) | |
| try: | |
| if p.lower().endswith(".csv"): | |
| return pd.read_csv(p, sep=None, engine="python") | |
| except Exception: | |
| pass | |
| try: | |
| return pd.read_json(p, lines=True) | |
| except Exception: | |
| return pd.read_json(p) | |
| # ----------------------- | |
| # Evaluation wrapper | |
| # ----------------------- | |
| def run_evaluation(file): | |
| path = save_uploaded(file) | |
| df = load_file_to_df(path) | |
| if df is None or df.empty: | |
| return None, None, None, None, None | |
| # Normalize column names | |
| df.columns = [c.strip() for c in df.columns] | |
| # Expected cols: task_id, task_type, prompt, agent, response, metadata | |
| for col in ["task_id", "task_type", "prompt", "agent", "response", "metadata"]: | |
| if col not in df.columns: | |
| df[col] = "" | |
| # Add reference column if not provided | |
| if "reference" not in df.columns: | |
| df["reference"] = "" | |
| metrics_df, images, leaderboard = evaluate_dataframe(df) | |
| figs = generate_visualizations(metrics_df, leaderboard) | |
| # save evaluation results | |
| csv_path = "/tmp/eval_results.csv" | |
| metrics_df.to_csv(csv_path, index=False) | |
| return figs, metrics_df, leaderboard, csv_path | |
| # ----------------------- | |
| # Gradio UI | |
| # ----------------------- | |
| with gr.Blocks(title="Agentic Evaluation Framework") as demo: | |
| gr.Markdown("## Agentic Evaluation Framework") | |
| gr.Markdown("Upload a CSV file with format: " | |
| "`task_id, task_type, prompt, agent, response, metadata`") | |
| with gr.Row(): | |
| file_upload = gr.File(label="Upload CSV", type="file") | |
| eval_btn = gr.Button("Run Evaluation", variant="primary") | |
| gallery = gr.Gallery(label="Visualizations", columns=2, height="auto") | |
| metrics_df_out = gr.Dataframe(label="Evaluation Results") | |
| leaderboard_out = gr.Dataframe(label="Leaderboard (Avg Scores)") | |
| download_out = gr.File(label="Download CSV Report") | |
| eval_btn.click( | |
| fn=run_evaluation, | |
| inputs=file_upload, | |
| outputs=[gallery, metrics_df_out, leaderboard_out, download_out] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |