Files changed (1) hide show
  1. app.py +72 -100
app.py CHANGED
@@ -230,132 +230,104 @@
230
  # demo.launch()
231
 
232
  # app.py (patch)
233
- """
234
- Gradio application entrypoint for Hugging Face Spaces.
235
- """
236
  import os
237
  import tempfile
238
- import pandas as pd
239
- import gradio as gr
240
 
241
- from evaluator import evaluate_dataframe # <<-- fixed import (was `evaluation`)
242
- from synthetic_data import generate_synthetic_dataset
 
243
 
244
- # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
245
  def save_uploaded(file_obj):
 
246
  if not file_obj:
247
  return None
248
-
249
- # When using some Gradio versions, file_obj may be a dict with 'name' or 'tmp_path'
250
  if isinstance(file_obj, dict):
251
- for key in ("name", "tmp_path", "file"):
252
- path = file_obj.get(key)
253
- if path and os.path.exists(path):
254
- return path
255
-
256
- # If it's already a path (string)
257
  if isinstance(file_obj, str) and os.path.exists(file_obj):
258
  return file_obj
259
-
260
- # If it has a .name attribute and file exists
261
- try:
262
- if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
263
- return file_obj.name
264
- except Exception:
265
- pass
266
-
267
- # Fallback: write bytes to a temp file
268
- try:
269
- data = file_obj.read()
270
- except Exception:
271
- return None
272
-
273
- # choose suffix heuristically
274
- name_attr = getattr(file_obj, "name", "")
275
- suffix = ".csv" if name_attr.lower().endswith(".csv") else ".json"
276
- fd, tmp = tempfile.mkstemp(suffix=suffix)
277
  with os.fdopen(fd, "wb") as f:
278
- if isinstance(data, str):
279
- f.write(data.encode())
280
- else:
281
- f.write(data)
282
  return tmp
283
 
284
  def load_file_to_df(path):
285
  if path is None:
286
  return None
 
 
 
 
 
 
287
  try:
288
- if str(path).lower().endswith(".csv"):
289
- return pd.read_csv(path)
290
- try:
291
- return pd.read_json(path, lines=True)
292
- except ValueError:
293
- return pd.read_json(path)
294
- except Exception as e:
295
- raise e
296
-
297
- def run_evaluation(file_obj):
298
- # If no file provided, use synthetic demo
299
- if file_obj is None:
300
- df = generate_synthetic_dataset(num_agents=3, num_samples=12)
301
- else:
302
- path = save_uploaded(file_obj)
303
- df = load_file_to_df(path)
304
-
305
- if df is None:
306
- return None, "No data loaded", None, None
307
 
308
- # Normalize column names
309
- cols = {c.lower(): c for c in df.columns}
310
- rename_map = {}
311
- for k in ["prompt", "response", "task", "agent", "reference"]:
312
- if k not in cols:
313
- if k == "reference":
314
- for alt in ["answer", "ground_truth", "ref"]:
315
- if alt in cols:
316
- rename_map[cols[alt]] = k
317
- break
318
- else:
319
- for alt in [k, k.capitalize(), k.upper()]:
320
- if alt.lower() in cols:
321
- rename_map[cols[alt.lower()]] = k
322
- if rename_map:
323
- df = df.rename(columns=rename_map)
324
 
325
- metrics_df, images, leaderboard = evaluate_dataframe(df)
 
 
326
 
327
- # Prepare gallery (list of image file paths). Gradio Gallery accepts list of (path, caption).
328
- gallery_items = [p for (p, caption) in images]
329
- captions = [caption for (p, caption) in images]
330
 
331
- # Save a CSV report for download
332
- out_csv = "/tmp/eval_results.csv"
333
- metrics_df.to_csv(out_csv, index=False)
334
 
335
- return (gallery_items, captions), metrics_df, leaderboard, out_csv
 
 
 
336
 
337
- # Build Gradio UI
338
- with gr.Blocks() as demo:
339
- gr.Markdown("# Agentic Evaluation Framework")
340
- gr.Markdown("Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. If no file is uploaded, a synthetic demo will run.")
341
 
342
- with gr.Row():
343
- file_input = gr.File(label="Upload CSV/JSON/JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
344
- run_btn = gr.Button("Run Evaluation")
345
- download_report = gr.File(label="Download CSV Report") # output
346
 
347
- gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
348
- table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
349
- leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
350
 
351
- def on_run(file_in):
352
- (gallery_items, captions), metrics_df, lb, out_csv = run_evaluation(file_in)
353
- gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
354
- return gallery_display, metrics_df, lb, out_csv
355
 
356
- # include download_report as the last output
357
- run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard, download_report])
 
358
 
359
- gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). - `reference` optional.\n- Download CSV report after evaluation.")
 
 
 
360
 
361
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  # demo.launch()
231
 
232
  # app.py (patch)
233
+ import gradio as gr
234
+ import pandas as pd
 
235
  import os
236
  import tempfile
237
+ from evaluator import evaluate_dataframe, generate_visualizations
 
238
 
239
+ # -----------------------
240
+ # Helpers
241
+ # -----------------------
242
 
 
243
  def save_uploaded(file_obj):
244
+ """Return a filesystem path for the uploaded file object."""
245
  if not file_obj:
246
  return None
 
 
247
  if isinstance(file_obj, dict):
248
+ for key in ("name", "path", "file"):
249
+ p = file_obj.get(key)
250
+ if p and os.path.exists(p):
251
+ return p
 
 
252
  if isinstance(file_obj, str) and os.path.exists(file_obj):
253
  return file_obj
254
+ if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
255
+ return file_obj.name
256
+ # fallback: dump bytes to tmp file
257
+ fd, tmp = tempfile.mkstemp(suffix=".csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  with os.fdopen(fd, "wb") as f:
259
+ f.write(file_obj.read())
 
 
 
260
  return tmp
261
 
262
  def load_file_to_df(path):
263
  if path is None:
264
  return None
265
+ p = str(path)
266
+ try:
267
+ if p.lower().endswith(".csv"):
268
+ return pd.read_csv(p, sep=None, engine="python")
269
+ except Exception:
270
+ pass
271
  try:
272
+ return pd.read_json(p, lines=True)
273
+ except Exception:
274
+ return pd.read_json(p)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ # -----------------------
277
+ # Evaluation wrapper
278
+ # -----------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ def run_evaluation(file):
281
+ path = save_uploaded(file)
282
+ df = load_file_to_df(path)
283
 
284
+ if df is None or df.empty:
285
+ return None, None, None, None, None
 
286
 
287
+ # Normalize column names
288
+ df.columns = [c.strip() for c in df.columns]
 
289
 
290
+ # Expected cols: task_id, task_type, prompt, agent, response, metadata
291
+ for col in ["task_id", "task_type", "prompt", "agent", "response", "metadata"]:
292
+ if col not in df.columns:
293
+ df[col] = ""
294
 
295
+ # Add reference column if not provided
296
+ if "reference" not in df.columns:
297
+ df["reference"] = ""
 
298
 
299
+ metrics_df, images, leaderboard = evaluate_dataframe(df)
300
+ figs = generate_visualizations(metrics_df, leaderboard)
 
 
301
 
302
+ # save evaluation results
303
+ csv_path = "/tmp/eval_results.csv"
304
+ metrics_df.to_csv(csv_path, index=False)
305
 
306
+ return figs, metrics_df, leaderboard, csv_path
 
 
 
307
 
308
+ # -----------------------
309
+ # Gradio UI
310
+ # -----------------------
311
 
312
+ with gr.Blocks(title="Agentic Evaluation Framework") as demo:
313
+ gr.Markdown("## Agentic Evaluation Framework")
314
+ gr.Markdown("Upload a CSV file with format: "
315
+ "`task_id, task_type, prompt, agent, response, metadata`")
316
 
317
+ with gr.Row():
318
+ file_upload = gr.File(label="Upload CSV", type="file")
319
+ eval_btn = gr.Button("Run Evaluation", variant="primary")
320
+
321
+ gallery = gr.Gallery(label="Visualizations", columns=2, height="auto")
322
+ metrics_df_out = gr.Dataframe(label="Evaluation Results")
323
+ leaderboard_out = gr.Dataframe(label="Leaderboard (Avg Scores)")
324
+ download_out = gr.File(label="Download CSV Report")
325
+
326
+ eval_btn.click(
327
+ fn=run_evaluation,
328
+ inputs=file_upload,
329
+ outputs=[gallery, metrics_df_out, leaderboard_out, download_out]
330
+ )
331
+
332
+ if __name__ == "__main__":
333
+ demo.launch()