Spaces:

Supastrikas-004
/

evaluation-framework

Runtime error

File size: 11,500 Bytes

# # app.py
# """
# Gradio application entrypoint for Hugging Face Spaces.
# """

# import os
# import tempfile
# import pandas as pd
# import gradio as gr
# from evaluator import evaluate_dataframe
# from synthetic_data import generate_synthetic_dataset

# # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
# def save_uploaded(file_obj):
#     if not file_obj:
#         return None
#     # file_obj can be a dictionary or a file-like object depending on Gradio version
#     try:
#         path = file_obj.name
#         return path
#     except Exception:
#         # fallback: write bytes to temp file
#         data = file_obj.read()
#         suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
#         fd, tmp = tempfile.mkstemp(suffix=suffix)
#         with os.fdopen(fd, "wb") as f:
#             f.write(data)
#         return tmp

# def load_file_to_df(path):
#     if path is None:
#         return None
#     # Try CSV
#     try:
#         if path.endswith(".csv"):
#             return pd.read_csv(path)
#         # JSONL
#         try:
#             return pd.read_json(path, lines=True)
#         except ValueError:
#             return pd.read_json(path)
#     except Exception as e:
#         # As last resort, raise
#         raise e

# def run_evaluation(file_obj):
#     # If no file provided, use synthetic demo
#     if file_obj is None:
#         df = generate_synthetic_dataset(num_agents=3, num_samples=12)
#     else:
#         path = save_uploaded(file_obj)
#         df = load_file_to_df(path)

#     # Ensure required columns exist; otherwise, attempt to map common alternatives
#     if df is None:
#         return None, "No data loaded", None

#     # Try to normalize column names
#     cols = {c.lower(): c for c in df.columns}
#     # rename common variants
#     rename_map = {}
#     for k in ["prompt", "response", "task", "agent", "reference"]:
#         if k not in cols:
#             # try variants
#             if k == "reference":
#                 for alt in ["answer", "ground_truth", "ref"]:
#                     if alt in cols:
#                         rename_map[cols[alt]] = k
#                         break
#             else:
#                 for alt in [k, k.capitalize(), k.upper()]:
#                     if alt.lower() in cols:
#                         rename_map[cols[alt.lower()]] = k
#     if rename_map:
#         df = df.rename(columns=rename_map)

#     metrics_df, images, leaderboard = evaluate_dataframe(df)

#     # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
#     gallery_items = [p for (p, caption) in images]
#     captions = [caption for (p, caption) in images]

#     # Save a CSV report for download
#     out_csv = "/tmp/eval_results.csv"
#     metrics_df.to_csv(out_csv, index=False)

#     return (gallery_items, captions), metrics_df, leaderboard

# # Build Gradio UI
# with gr.Blocks() as demo:
#     gr.Markdown("# Agentic Evaluation Framework")
#     gr.Markdown(
#         "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
#         "If no file is uploaded, a small synthetic demo will run."
#     )

#     with gr.Row():
#         file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
#         run_btn = gr.Button("Run Evaluation")
#         download_report = gr.File(label="Download CSV Report")

#     # ✅ Fixed Gallery (removed .style, added columns=2)
#     gallery = gr.Gallery(
#         label="Visualization Outputs",
#         columns=2,
#         height="auto"
#     )
#     table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
#     leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")

#     def on_run(file_in):
#         (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
#         # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
#         gallery_display = []
#         for i, p in enumerate(gallery_items):
#             caption = captions[i] if i < len(captions) else ""
#             gallery_display.append((p, caption))
#         return gallery_display, metrics_df, lb

#     run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])

#     gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
#                 "- `reference` can be empty but accuracy/hallucination will be weaker.\n"
#                 "- Visualization images are available in the Gallery and a CSV report is downloadable.")

# demo.launch()
# app.py
# """
# Gradio application entrypoint for Hugging Face Spaces.
# """

# import os
# import tempfile
# import pandas as pd
# import gradio as gr
# from evaluation import evaluate_dataframe   # ✅ updated import
# from synthetic_data import generate_synthetic_dataset

# # Helper to save uploaded file
# def save_uploaded(file_obj):
#     if not file_obj:
#         return None
#     try:
#         return file_obj.name
#     except Exception:
#         data = file_obj.read()
#         suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
#         fd, tmp = tempfile.mkstemp(suffix=suffix)
#         with os.fdopen(fd, "wb") as f:
#             f.write(data)
#         return tmp

# def load_file_to_df(path):
#     if path is None:
#         return None
#     try:
#         if path.endswith(".csv"):
#             return pd.read_csv(path)
#         try:
#             return pd.read_json(path, lines=True)
#         except ValueError:
#             return pd.read_json(path)
#     except Exception as e:
#         raise e

# def run_evaluation(file_obj):
#     if file_obj is None:
#         df = generate_synthetic_dataset(num_agents=3, num_samples=12)
#     else:
#         path = save_uploaded(file_obj)
#         df = load_file_to_df(path)

#     if df is None:
#         return None, "No data loaded", None

#     # Normalize column names
#     cols = {c.lower(): c for c in df.columns}
#     rename_map = {}
#     for k in ["prompt", "response", "task", "agent", "reference"]:
#         if k not in cols:
#             if k == "reference":
#                 for alt in ["answer", "ground_truth", "ref"]:
#                     if alt in cols:
#                         rename_map[cols[alt]] = k
#                         break
#             else:
#                 for alt in [k, k.capitalize(), k.upper()]:
#                     if alt.lower() in cols:
#                         rename_map[cols[alt.lower()]] = k
#     if rename_map:
#         df = df.rename(columns=rename_map)

#     metrics_df, images, leaderboard = evaluate_dataframe(df)

#     gallery_items = [p for (p, caption) in images]
#     captions = [caption for (p, caption) in images]

#     out_csv = "/tmp/eval_results.csv"
#     metrics_df.to_csv(out_csv, index=False)

#     return (gallery_items, captions), metrics_df, leaderboard

# # Build Gradio UI
# with gr.Blocks() as demo:
#     gr.Markdown("# Agentic Evaluation Framework")
#     gr.Markdown(
#         "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
#         "If no file is uploaded, a synthetic demo will run."
#     )

#     with gr.Row():
#         file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
#         run_btn = gr.Button("Run Evaluation")
#         download_report = gr.File(label="Download CSV Report")

#     gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
#     table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
#     leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")

#     def on_run(file_in):
#         (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
#         gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
#         return gallery_display, metrics_df, lb

#     run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])

#     gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
#                 "- `reference` optional.\n- Download CSV report after evaluation.")

# demo.launch()

# app.py (patch)
import gradio as gr
import pandas as pd
import os
import tempfile
from evaluator import evaluate_dataframe, generate_visualizations

# -----------------------
# Helpers
# -----------------------

def save_uploaded(file_obj):
    """Return a filesystem path for the uploaded file object."""
    if not file_obj:
        return None
    if isinstance(file_obj, dict):
        for key in ("name", "path", "file"):
            p = file_obj.get(key)
            if p and os.path.exists(p):
                return p
    if isinstance(file_obj, str) and os.path.exists(file_obj):
        return file_obj
    if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
        return file_obj.name
    # fallback: dump bytes to tmp file
    fd, tmp = tempfile.mkstemp(suffix=".csv")
    with os.fdopen(fd, "wb") as f:
        f.write(file_obj.read())
    return tmp

def load_file_to_df(path):
    if path is None:
        return None
    p = str(path)
    try:
        if p.lower().endswith(".csv"):
            return pd.read_csv(p, sep=None, engine="python")
    except Exception:
        pass
    try:
        return pd.read_json(p, lines=True)
    except Exception:
        return pd.read_json(p)

# -----------------------
# Evaluation wrapper
# -----------------------

def run_evaluation(file):
    path = save_uploaded(file)
    df = load_file_to_df(path)

    if df is None or df.empty:
        return None, None, None, None, None

    # Normalize column names
    df.columns = [c.strip() for c in df.columns]

    # Expected cols: task_id, task_type, prompt, agent, response, metadata
    for col in ["task_id", "task_type", "prompt", "agent", "response", "metadata"]:
        if col not in df.columns:
            df[col] = ""

    # Add reference column if not provided
    if "reference" not in df.columns:
        df["reference"] = ""

    metrics_df, images, leaderboard = evaluate_dataframe(df)
    figs = generate_visualizations(metrics_df, leaderboard)

    # save evaluation results
    csv_path = "/tmp/eval_results.csv"
    metrics_df.to_csv(csv_path, index=False)

    return figs, metrics_df, leaderboard, csv_path

# -----------------------
# Gradio UI
# -----------------------

with gr.Blocks(title="Agentic Evaluation Framework") as demo:
    gr.Markdown("## Agentic Evaluation Framework")
    gr.Markdown("Upload a CSV file with format: "
                "`task_id, task_type, prompt, agent, response, metadata`")

    with gr.Row():
        file_upload = gr.File(label="Upload CSV", type="file")
        eval_btn = gr.Button("Run Evaluation", variant="primary")

    gallery = gr.Gallery(label="Visualizations", columns=2, height="auto")
    metrics_df_out = gr.Dataframe(label="Evaluation Results")
    leaderboard_out = gr.Dataframe(label="Leaderboard (Avg Scores)")
    download_out = gr.File(label="Download CSV Report")

    eval_btn.click(
        fn=run_evaluation,
        inputs=file_upload,
        outputs=[gallery, metrics_df_out, leaderboard_out, download_out]
    )

if __name__ == "__main__":
    demo.launch()