Spaces:
Runtime error
Runtime error
Update app.py
#5
by
manayporwal07
- opened
app.py
CHANGED
|
@@ -125,106 +125,106 @@
|
|
| 125 |
|
| 126 |
# demo.launch()
|
| 127 |
# app.py
|
| 128 |
-
"""
|
| 129 |
-
Gradio application entrypoint for Hugging Face Spaces.
|
| 130 |
-
"""
|
| 131 |
-
|
| 132 |
-
import os
|
| 133 |
-
import tempfile
|
| 134 |
-
import pandas as pd
|
| 135 |
-
import gradio as gr
|
| 136 |
-
from evaluation import evaluate_dataframe # β
updated import
|
| 137 |
-
from synthetic_data import generate_synthetic_dataset
|
| 138 |
-
|
| 139 |
-
# Helper to save uploaded file
|
| 140 |
-
def save_uploaded(file_obj):
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
def load_file_to_df(path):
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
def run_evaluation(file_obj):
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
# Build Gradio UI
|
| 204 |
-
with gr.Blocks() as demo:
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
demo.launch()
|
|
|
|
| 125 |
|
| 126 |
# demo.launch()
|
| 127 |
# app.py
|
| 128 |
+
# """
|
| 129 |
+
# Gradio application entrypoint for Hugging Face Spaces.
|
| 130 |
+
# """
|
| 131 |
+
|
| 132 |
+
# import os
|
| 133 |
+
# import tempfile
|
| 134 |
+
# import pandas as pd
|
| 135 |
+
# import gradio as gr
|
| 136 |
+
# from evaluation import evaluate_dataframe # β
updated import
|
| 137 |
+
# from synthetic_data import generate_synthetic_dataset
|
| 138 |
+
|
| 139 |
+
# # Helper to save uploaded file
|
| 140 |
+
# def save_uploaded(file_obj):
|
| 141 |
+
# if not file_obj:
|
| 142 |
+
# return None
|
| 143 |
+
# try:
|
| 144 |
+
# return file_obj.name
|
| 145 |
+
# except Exception:
|
| 146 |
+
# data = file_obj.read()
|
| 147 |
+
# suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
|
| 148 |
+
# fd, tmp = tempfile.mkstemp(suffix=suffix)
|
| 149 |
+
# with os.fdopen(fd, "wb") as f:
|
| 150 |
+
# f.write(data)
|
| 151 |
+
# return tmp
|
| 152 |
+
|
| 153 |
+
# def load_file_to_df(path):
|
| 154 |
+
# if path is None:
|
| 155 |
+
# return None
|
| 156 |
+
# try:
|
| 157 |
+
# if path.endswith(".csv"):
|
| 158 |
+
# return pd.read_csv(path)
|
| 159 |
+
# try:
|
| 160 |
+
# return pd.read_json(path, lines=True)
|
| 161 |
+
# except ValueError:
|
| 162 |
+
# return pd.read_json(path)
|
| 163 |
+
# except Exception as e:
|
| 164 |
+
# raise e
|
| 165 |
+
|
| 166 |
+
# def run_evaluation(file_obj):
|
| 167 |
+
# if file_obj is None:
|
| 168 |
+
# df = generate_synthetic_dataset(num_agents=3, num_samples=12)
|
| 169 |
+
# else:
|
| 170 |
+
# path = save_uploaded(file_obj)
|
| 171 |
+
# df = load_file_to_df(path)
|
| 172 |
+
|
| 173 |
+
# if df is None:
|
| 174 |
+
# return None, "No data loaded", None
|
| 175 |
+
|
| 176 |
+
# # Normalize column names
|
| 177 |
+
# cols = {c.lower(): c for c in df.columns}
|
| 178 |
+
# rename_map = {}
|
| 179 |
+
# for k in ["prompt", "response", "task", "agent", "reference"]:
|
| 180 |
+
# if k not in cols:
|
| 181 |
+
# if k == "reference":
|
| 182 |
+
# for alt in ["answer", "ground_truth", "ref"]:
|
| 183 |
+
# if alt in cols:
|
| 184 |
+
# rename_map[cols[alt]] = k
|
| 185 |
+
# break
|
| 186 |
+
# else:
|
| 187 |
+
# for alt in [k, k.capitalize(), k.upper()]:
|
| 188 |
+
# if alt.lower() in cols:
|
| 189 |
+
# rename_map[cols[alt.lower()]] = k
|
| 190 |
+
# if rename_map:
|
| 191 |
+
# df = df.rename(columns=rename_map)
|
| 192 |
+
|
| 193 |
+
# metrics_df, images, leaderboard = evaluate_dataframe(df)
|
| 194 |
+
|
| 195 |
+
# gallery_items = [p for (p, caption) in images]
|
| 196 |
+
# captions = [caption for (p, caption) in images]
|
| 197 |
+
|
| 198 |
+
# out_csv = "/tmp/eval_results.csv"
|
| 199 |
+
# metrics_df.to_csv(out_csv, index=False)
|
| 200 |
+
|
| 201 |
+
# return (gallery_items, captions), metrics_df, leaderboard
|
| 202 |
+
|
| 203 |
+
# # Build Gradio UI
|
| 204 |
+
# with gr.Blocks() as demo:
|
| 205 |
+
# gr.Markdown("# Agentic Evaluation Framework")
|
| 206 |
+
# gr.Markdown(
|
| 207 |
+
# "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
|
| 208 |
+
# "If no file is uploaded, a synthetic demo will run."
|
| 209 |
+
# )
|
| 210 |
+
|
| 211 |
+
# with gr.Row():
|
| 212 |
+
# file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
|
| 213 |
+
# run_btn = gr.Button("Run Evaluation")
|
| 214 |
+
# download_report = gr.File(label="Download CSV Report")
|
| 215 |
+
|
| 216 |
+
# gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
|
| 217 |
+
# table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
|
| 218 |
+
# leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
|
| 219 |
+
|
| 220 |
+
# def on_run(file_in):
|
| 221 |
+
# (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
|
| 222 |
+
# gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
|
| 223 |
+
# return gallery_display, metrics_df, lb
|
| 224 |
+
|
| 225 |
+
# run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
|
| 226 |
+
|
| 227 |
+
# gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
|
| 228 |
+
# "- `reference` optional.\n- Download CSV report after evaluation.")
|
| 229 |
+
|
| 230 |
+
# demo.launch()
|