manayporwal07 commited on
Commit
88604d0
Β·
verified Β·
1 Parent(s): d4e4912

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -49
app.py CHANGED
@@ -229,38 +229,63 @@
229
 
230
  # demo.launch()
231
 
 
232
  """
233
  Gradio application entrypoint for Hugging Face Spaces.
234
  """
235
-
236
  import os
237
  import tempfile
238
  import pandas as pd
239
  import gradio as gr
240
- from evaluator import evaluate_dataframe
 
241
  from synthetic_data import generate_synthetic_dataset
242
 
243
- # -----------------------------
244
- # File Handling
245
- # -----------------------------
246
  def save_uploaded(file_obj):
247
  if not file_obj:
248
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  try:
250
- return file_obj.name
 
251
  except Exception:
 
 
 
 
252
  data = file_obj.read()
253
- suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
254
- fd, tmp = tempfile.mkstemp(suffix=suffix)
255
- with os.fdopen(fd, "wb") as f:
 
 
 
 
 
 
 
 
256
  f.write(data)
257
- return tmp
258
 
259
  def load_file_to_df(path):
260
  if path is None:
261
  return None
262
  try:
263
- if path.endswith(".csv"):
264
  return pd.read_csv(path)
265
  try:
266
  return pd.read_json(path, lines=True)
@@ -269,11 +294,8 @@ def load_file_to_df(path):
269
  except Exception as e:
270
  raise e
271
 
272
-
273
- # -----------------------------
274
- # Evaluation Pipeline
275
- # -----------------------------
276
  def run_evaluation(file_obj):
 
277
  if file_obj is None:
278
  df = generate_synthetic_dataset(num_agents=3, num_samples=12)
279
  else:
@@ -281,65 +303,59 @@ def run_evaluation(file_obj):
281
  df = load_file_to_df(path)
282
 
283
  if df is None:
284
- return None, "No data loaded", None
285
 
286
  # Normalize column names
287
  cols = {c.lower(): c for c in df.columns}
288
  rename_map = {}
289
- for k in ["task_id", "prompt", "response", "agent", "reference"]:
290
  if k not in cols:
291
- for alt in [k, k.capitalize(), k.upper()]:
292
- if alt.lower() in cols:
293
- rename_map[cols[alt.lower()]] = k
294
- break
 
 
 
 
 
295
  if rename_map:
296
  df = df.rename(columns=rename_map)
297
 
298
  metrics_df, images, leaderboard = evaluate_dataframe(df)
299
 
 
300
  gallery_items = [p for (p, caption) in images]
301
  captions = [caption for (p, caption) in images]
302
 
303
- # Save CSV report
304
  out_csv = "/tmp/eval_results.csv"
305
  metrics_df.to_csv(out_csv, index=False)
306
 
307
- return (gallery_items, captions), metrics_df, leaderboard
308
 
309
-
310
- # -----------------------------
311
- # Gradio UI
312
- # -----------------------------
313
  with gr.Blocks() as demo:
314
- gr.Markdown("# πŸ§ͺ Agentic Evaluation Framework")
315
- gr.Markdown(
316
- "Upload a CSV/JSON/JSONL with columns: "
317
- "`task_id,prompt,response,agent,reference`. "
318
- "If no file is uploaded, a small synthetic demo will run."
319
- )
320
 
321
  with gr.Row():
322
- file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
323
  run_btn = gr.Button("Run Evaluation")
324
- download_report = gr.File(label="Download CSV Report")
325
 
326
  gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
327
  table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
328
- leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent)")
329
 
330
  def on_run(file_in):
331
- (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
332
- gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)] if gallery_items else []
333
- csv_path = "/tmp/eval_results.csv" # ensure CSV report is downloadable
334
- return gallery_display, metrics_df, lb, csv_path
335
-
336
- run_btn.click(
337
- fn=on_run,
338
- inputs=[file_input],
339
- outputs=[gallery, table, leaderboard, download_report]
340
- )
341
-
342
- gr.Markdown("## Tips\n- Ensure columns: `task_id,prompt,response,agent,reference` "
343
- "(case-insensitive).\n- Visualization images in Gallery.\n- Download CSV after evaluation.")
344
 
345
  demo.launch()
 
229
 
230
  # demo.launch()
231
 
232
+ # app.py (patch)
233
  """
234
  Gradio application entrypoint for Hugging Face Spaces.
235
  """
 
236
  import os
237
  import tempfile
238
  import pandas as pd
239
  import gradio as gr
240
+
241
+ from evaluator import evaluate_dataframe # <<-- fixed import (was `evaluation`)
242
  from synthetic_data import generate_synthetic_dataset
243
 
244
+ # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
 
 
245
  def save_uploaded(file_obj):
246
  if not file_obj:
247
  return None
248
+
249
+ # When using some Gradio versions, file_obj may be a dict with 'name' or 'tmp_path'
250
+ if isinstance(file_obj, dict):
251
+ for key in ("name", "tmp_path", "file"):
252
+ path = file_obj.get(key)
253
+ if path and os.path.exists(path):
254
+ return path
255
+
256
+ # If it's already a path (string)
257
+ if isinstance(file_obj, str) and os.path.exists(file_obj):
258
+ return file_obj
259
+
260
+ # If it has a .name attribute and file exists
261
  try:
262
+ if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
263
+ return file_obj.name
264
  except Exception:
265
+ pass
266
+
267
+ # Fallback: write bytes to a temp file
268
+ try:
269
  data = file_obj.read()
270
+ except Exception:
271
+ return None
272
+
273
+ # choose suffix heuristically
274
+ name_attr = getattr(file_obj, "name", "")
275
+ suffix = ".csv" if name_attr.lower().endswith(".csv") else ".json"
276
+ fd, tmp = tempfile.mkstemp(suffix=suffix)
277
+ with os.fdopen(fd, "wb") as f:
278
+ if isinstance(data, str):
279
+ f.write(data.encode())
280
+ else:
281
  f.write(data)
282
+ return tmp
283
 
284
  def load_file_to_df(path):
285
  if path is None:
286
  return None
287
  try:
288
+ if str(path).lower().endswith(".csv"):
289
  return pd.read_csv(path)
290
  try:
291
  return pd.read_json(path, lines=True)
 
294
  except Exception as e:
295
  raise e
296
 
 
 
 
 
297
  def run_evaluation(file_obj):
298
+ # If no file provided, use synthetic demo
299
  if file_obj is None:
300
  df = generate_synthetic_dataset(num_agents=3, num_samples=12)
301
  else:
 
303
  df = load_file_to_df(path)
304
 
305
  if df is None:
306
+ return None, "No data loaded", None, None
307
 
308
  # Normalize column names
309
  cols = {c.lower(): c for c in df.columns}
310
  rename_map = {}
311
+ for k in ["prompt", "response", "task", "agent", "reference"]:
312
  if k not in cols:
313
+ if k == "reference":
314
+ for alt in ["answer", "ground_truth", "ref"]:
315
+ if alt in cols:
316
+ rename_map[cols[alt]] = k
317
+ break
318
+ else:
319
+ for alt in [k, k.capitalize(), k.upper()]:
320
+ if alt.lower() in cols:
321
+ rename_map[cols[alt.lower()]] = k
322
  if rename_map:
323
  df = df.rename(columns=rename_map)
324
 
325
  metrics_df, images, leaderboard = evaluate_dataframe(df)
326
 
327
+ # Prepare gallery (list of image file paths). Gradio Gallery accepts list of (path, caption).
328
  gallery_items = [p for (p, caption) in images]
329
  captions = [caption for (p, caption) in images]
330
 
331
+ # Save a CSV report for download
332
  out_csv = "/tmp/eval_results.csv"
333
  metrics_df.to_csv(out_csv, index=False)
334
 
335
+ return (gallery_items, captions), metrics_df, leaderboard, out_csv
336
 
337
+ # Build Gradio UI
 
 
 
338
  with gr.Blocks() as demo:
339
+ gr.Markdown("# Agentic Evaluation Framework")
340
+ gr.Markdown("Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. If no file is uploaded, a synthetic demo will run.")
 
 
 
 
341
 
342
  with gr.Row():
343
+ file_input = gr.File(label="Upload CSV/JSON/JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
344
  run_btn = gr.Button("Run Evaluation")
345
+ download_report = gr.File(label="Download CSV Report") # output
346
 
347
  gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
348
  table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
349
+ leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
350
 
351
  def on_run(file_in):
352
+ (gallery_items, captions), metrics_df, lb, out_csv = run_evaluation(file_in)
353
+ gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
354
+ return gallery_display, metrics_df, lb, out_csv
355
+
356
+ # include download_report as the last output
357
+ run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard, download_report])
358
+
359
+ gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). - `reference` optional.\n- Download CSV report after evaluation.")
 
 
 
 
 
360
 
361
  demo.launch()