# # app.py # """ # Gradio application entrypoint for Hugging Face Spaces. # """ # import os # import tempfile # import pandas as pd # import gradio as gr # from evaluator import evaluate_dataframe # from synthetic_data import generate_synthetic_dataset # # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object) # def save_uploaded(file_obj): # if not file_obj: # return None # # file_obj can be a dictionary or a file-like object depending on Gradio version # try: # path = file_obj.name # return path # except Exception: # # fallback: write bytes to temp file # data = file_obj.read() # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" # fd, tmp = tempfile.mkstemp(suffix=suffix) # with os.fdopen(fd, "wb") as f: # f.write(data) # return tmp # def load_file_to_df(path): # if path is None: # return None # # Try CSV # try: # if path.endswith(".csv"): # return pd.read_csv(path) # # JSONL # try: # return pd.read_json(path, lines=True) # except ValueError: # return pd.read_json(path) # except Exception as e: # # As last resort, raise # raise e # def run_evaluation(file_obj): # # If no file provided, use synthetic demo # if file_obj is None: # df = generate_synthetic_dataset(num_agents=3, num_samples=12) # else: # path = save_uploaded(file_obj) # df = load_file_to_df(path) # # Ensure required columns exist; otherwise, attempt to map common alternatives # if df is None: # return None, "No data loaded", None # # Try to normalize column names # cols = {c.lower(): c for c in df.columns} # # rename common variants # rename_map = {} # for k in ["prompt", "response", "task", "agent", "reference"]: # if k not in cols: # # try variants # if k == "reference": # for alt in ["answer", "ground_truth", "ref"]: # if alt in cols: # rename_map[cols[alt]] = k # break # else: # for alt in [k, k.capitalize(), k.upper()]: # if alt.lower() in cols: # rename_map[cols[alt.lower()]] = k # if rename_map: # df = df.rename(columns=rename_map) # metrics_df, images, leaderboard = evaluate_dataframe(df) # # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images. # gallery_items = [p for (p, caption) in images] # captions = [caption for (p, caption) in images] # # Save a CSV report for download # out_csv = "/tmp/eval_results.csv" # metrics_df.to_csv(out_csv, index=False) # return (gallery_items, captions), metrics_df, leaderboard # # Build Gradio UI # with gr.Blocks() as demo: # gr.Markdown("# Agentic Evaluation Framework") # gr.Markdown( # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). " # "If no file is uploaded, a small synthetic demo will run." # ) # with gr.Row(): # file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"]) # run_btn = gr.Button("Run Evaluation") # download_report = gr.File(label="Download CSV Report") # # ✅ Fixed Gallery (removed .style, added columns=2) # gallery = gr.Gallery( # label="Visualization Outputs", # columns=2, # height="auto" # ) # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)") # def on_run(file_in): # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) # # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption) # gallery_display = [] # for i, p in enumerate(gallery_items): # caption = captions[i] if i < len(captions) else "" # gallery_display.append((p, caption)) # return gallery_display, metrics_df, lb # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) # gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). " # "- `reference` can be empty but accuracy/hallucination will be weaker.\n" # "- Visualization images are available in the Gallery and a CSV report is downloadable.") # demo.launch() # app.py # """ # Gradio application entrypoint for Hugging Face Spaces. # """ # import os # import tempfile # import pandas as pd # import gradio as gr # from evaluation import evaluate_dataframe # ✅ updated import # from synthetic_data import generate_synthetic_dataset # # Helper to save uploaded file # def save_uploaded(file_obj): # if not file_obj: # return None # try: # return file_obj.name # except Exception: # data = file_obj.read() # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json" # fd, tmp = tempfile.mkstemp(suffix=suffix) # with os.fdopen(fd, "wb") as f: # f.write(data) # return tmp # def load_file_to_df(path): # if path is None: # return None # try: # if path.endswith(".csv"): # return pd.read_csv(path) # try: # return pd.read_json(path, lines=True) # except ValueError: # return pd.read_json(path) # except Exception as e: # raise e # def run_evaluation(file_obj): # if file_obj is None: # df = generate_synthetic_dataset(num_agents=3, num_samples=12) # else: # path = save_uploaded(file_obj) # df = load_file_to_df(path) # if df is None: # return None, "No data loaded", None # # Normalize column names # cols = {c.lower(): c for c in df.columns} # rename_map = {} # for k in ["prompt", "response", "task", "agent", "reference"]: # if k not in cols: # if k == "reference": # for alt in ["answer", "ground_truth", "ref"]: # if alt in cols: # rename_map[cols[alt]] = k # break # else: # for alt in [k, k.capitalize(), k.upper()]: # if alt.lower() in cols: # rename_map[cols[alt.lower()]] = k # if rename_map: # df = df.rename(columns=rename_map) # metrics_df, images, leaderboard = evaluate_dataframe(df) # gallery_items = [p for (p, caption) in images] # captions = [caption for (p, caption) in images] # out_csv = "/tmp/eval_results.csv" # metrics_df.to_csv(out_csv, index=False) # return (gallery_items, captions), metrics_df, leaderboard # # Build Gradio UI # with gr.Blocks() as demo: # gr.Markdown("# Agentic Evaluation Framework") # gr.Markdown( # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. " # "If no file is uploaded, a synthetic demo will run." # ) # with gr.Row(): # file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"]) # run_btn = gr.Button("Run Evaluation") # download_report = gr.File(label="Download CSV Report") # gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto") # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)") # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)") # def on_run(file_in): # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in) # gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)] # return gallery_display, metrics_df, lb # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard]) # gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). " # "- `reference` optional.\n- Download CSV report after evaluation.") # demo.launch() # app.py (patch) import gradio as gr import pandas as pd import os import tempfile from evaluator import evaluate_dataframe, generate_visualizations # ----------------------- # Helpers # ----------------------- def save_uploaded(file_obj): """Return a filesystem path for the uploaded file object.""" if not file_obj: return None if isinstance(file_obj, dict): for key in ("name", "path", "file"): p = file_obj.get(key) if p and os.path.exists(p): return p if isinstance(file_obj, str) and os.path.exists(file_obj): return file_obj if hasattr(file_obj, "name") and os.path.exists(file_obj.name): return file_obj.name # fallback: dump bytes to tmp file fd, tmp = tempfile.mkstemp(suffix=".csv") with os.fdopen(fd, "wb") as f: f.write(file_obj.read()) return tmp def load_file_to_df(path): if path is None: return None p = str(path) try: if p.lower().endswith(".csv"): return pd.read_csv(p, sep=None, engine="python") except Exception: pass try: return pd.read_json(p, lines=True) except Exception: return pd.read_json(p) # ----------------------- # Evaluation wrapper # ----------------------- def run_evaluation(file): path = save_uploaded(file) df = load_file_to_df(path) if df is None or df.empty: return None, None, None, None, None # Normalize column names df.columns = [c.strip() for c in df.columns] # Expected cols: task_id, task_type, prompt, agent, response, metadata for col in ["task_id", "task_type", "prompt", "agent", "response", "metadata"]: if col not in df.columns: df[col] = "" # Add reference column if not provided if "reference" not in df.columns: df["reference"] = "" metrics_df, images, leaderboard = evaluate_dataframe(df) figs = generate_visualizations(metrics_df, leaderboard) # save evaluation results csv_path = "/tmp/eval_results.csv" metrics_df.to_csv(csv_path, index=False) return figs, metrics_df, leaderboard, csv_path # ----------------------- # Gradio UI # ----------------------- with gr.Blocks(title="Agentic Evaluation Framework") as demo: gr.Markdown("## Agentic Evaluation Framework") gr.Markdown("Upload a CSV file with format: " "`task_id, task_type, prompt, agent, response, metadata`") with gr.Row(): file_upload = gr.File(label="Upload CSV", type="file") eval_btn = gr.Button("Run Evaluation", variant="primary") gallery = gr.Gallery(label="Visualizations", columns=2, height="auto") metrics_df_out = gr.Dataframe(label="Evaluation Results") leaderboard_out = gr.Dataframe(label="Leaderboard (Avg Scores)") download_out = gr.File(label="Download CSV Report") eval_btn.click( fn=run_evaluation, inputs=file_upload, outputs=[gallery, metrics_df_out, leaderboard_out, download_out] ) if __name__ == "__main__": demo.launch()