Files changed (1) hide show
  1. app.py +110 -0
app.py CHANGED
@@ -228,3 +228,113 @@
228
  # "- `reference` optional.\n- Download CSV report after evaluation.")
229
 
230
  # demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  # "- `reference` optional.\n- Download CSV report after evaluation.")
229
 
230
  # demo.launch()
231
+
232
+ """
233
+ Gradio application entrypoint for Hugging Face Spaces.
234
+ """
235
+
236
+ import os
237
+ import tempfile
238
+ import pandas as pd
239
+ import gradio as gr
240
+ from evaluator import evaluate_dataframe
241
+ from synthetic_data import generate_synthetic_dataset
242
+
243
+ # -----------------------------
244
+ # File Handling
245
+ # -----------------------------
246
+ def save_uploaded(file_obj):
247
+ if not file_obj:
248
+ return None
249
+ try:
250
+ return file_obj.name
251
+ except Exception:
252
+ data = file_obj.read()
253
+ suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
254
+ fd, tmp = tempfile.mkstemp(suffix=suffix)
255
+ with os.fdopen(fd, "wb") as f:
256
+ f.write(data)
257
+ return tmp
258
+
259
+ def load_file_to_df(path):
260
+ if path is None:
261
+ return None
262
+ try:
263
+ if path.endswith(".csv"):
264
+ return pd.read_csv(path)
265
+ try:
266
+ return pd.read_json(path, lines=True)
267
+ except ValueError:
268
+ return pd.read_json(path)
269
+ except Exception as e:
270
+ raise e
271
+
272
+
273
+ # -----------------------------
274
+ # Evaluation Pipeline
275
+ # -----------------------------
276
+ def run_evaluation(file_obj):
277
+ if file_obj is None:
278
+ df = generate_synthetic_dataset(num_agents=3, num_samples=12)
279
+ else:
280
+ path = save_uploaded(file_obj)
281
+ df = load_file_to_df(path)
282
+
283
+ if df is None:
284
+ return None, "No data loaded", None
285
+
286
+ # Normalize column names
287
+ cols = {c.lower(): c for c in df.columns}
288
+ rename_map = {}
289
+ for k in ["task_id", "prompt", "response", "agent", "reference"]:
290
+ if k not in cols:
291
+ for alt in [k, k.capitalize(), k.upper()]:
292
+ if alt.lower() in cols:
293
+ rename_map[cols[alt.lower()]] = k
294
+ break
295
+ if rename_map:
296
+ df = df.rename(columns=rename_map)
297
+
298
+ metrics_df, images, leaderboard = evaluate_dataframe(df)
299
+
300
+ gallery_items = [p for (p, caption) in images]
301
+ captions = [caption for (p, caption) in images]
302
+
303
+ # Save CSV report
304
+ out_csv = "/tmp/eval_results.csv"
305
+ metrics_df.to_csv(out_csv, index=False)
306
+
307
+ return (gallery_items, captions), metrics_df, leaderboard
308
+
309
+
310
+ # -----------------------------
311
+ # Gradio UI
312
+ # -----------------------------
313
+ with gr.Blocks() as demo:
314
+ gr.Markdown("# πŸ§ͺ Agentic Evaluation Framework")
315
+ gr.Markdown(
316
+ "Upload a CSV/JSON/JSONL with columns: "
317
+ "`task_id,prompt,response,agent,reference`. "
318
+ "If no file is uploaded, a small synthetic demo will run."
319
+ )
320
+
321
+ with gr.Row():
322
+ file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
323
+ run_btn = gr.Button("Run Evaluation")
324
+ download_report = gr.File(label="Download CSV Report")
325
+
326
+ gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
327
+ table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
328
+ leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")
329
+
330
+ def on_run(file_in):
331
+ (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
332
+ gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
333
+ return gallery_display, metrics_df, lb
334
+
335
+ run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
336
+
337
+ gr.Markdown("## Tips\n- Ensure columns: `task_id,prompt,response,agent,reference` "
338
+ "(case-insensitive).\n- Visualization images in Gallery.\n- Download CSV after evaluation.")
339
+
340
+ demo.launch()