Files changed (1) hide show
  1. app.py +138 -34
app.py CHANGED
@@ -1,3 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # app.py
2
  """
3
  Gradio application entrypoint for Hugging Face Spaces.
@@ -7,19 +133,16 @@ import os
7
  import tempfile
8
  import pandas as pd
9
  import gradio as gr
10
- from evaluator import evaluate_dataframe
11
  from synthetic_data import generate_synthetic_dataset
12
 
13
- # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
14
  def save_uploaded(file_obj):
15
  if not file_obj:
16
  return None
17
- # file_obj can be a dictionary or a file-like object depending on Gradio version
18
  try:
19
- path = file_obj.name
20
- return path
21
  except Exception:
22
- # fallback: write bytes to temp file
23
  data = file_obj.read()
24
  suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
25
  fd, tmp = tempfile.mkstemp(suffix=suffix)
@@ -30,38 +153,31 @@ def save_uploaded(file_obj):
30
  def load_file_to_df(path):
31
  if path is None:
32
  return None
33
- # Try CSV
34
  try:
35
  if path.endswith(".csv"):
36
  return pd.read_csv(path)
37
- # JSONL
38
  try:
39
  return pd.read_json(path, lines=True)
40
  except ValueError:
41
  return pd.read_json(path)
42
  except Exception as e:
43
- # As last resort, raise
44
  raise e
45
 
46
  def run_evaluation(file_obj):
47
- # If no file provided, use synthetic demo
48
  if file_obj is None:
49
  df = generate_synthetic_dataset(num_agents=3, num_samples=12)
50
  else:
51
  path = save_uploaded(file_obj)
52
  df = load_file_to_df(path)
53
 
54
- # Ensure required columns exist; otherwise, attempt to map common alternatives
55
  if df is None:
56
  return None, "No data loaded", None
57
 
58
- # Try to normalize column names
59
  cols = {c.lower(): c for c in df.columns}
60
- # rename common variants
61
  rename_map = {}
62
  for k in ["prompt", "response", "task", "agent", "reference"]:
63
  if k not in cols:
64
- # try variants
65
  if k == "reference":
66
  for alt in ["answer", "ground_truth", "ref"]:
67
  if alt in cols:
@@ -76,11 +192,9 @@ def run_evaluation(file_obj):
76
 
77
  metrics_df, images, leaderboard = evaluate_dataframe(df)
78
 
79
- # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
80
  gallery_items = [p for (p, caption) in images]
81
  captions = [caption for (p, caption) in images]
82
 
83
- # Save a CSV report for download
84
  out_csv = "/tmp/eval_results.csv"
85
  metrics_df.to_csv(out_csv, index=False)
86
 
@@ -90,37 +204,27 @@ def run_evaluation(file_obj):
90
  with gr.Blocks() as demo:
91
  gr.Markdown("# Agentic Evaluation Framework")
92
  gr.Markdown(
93
- "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
94
- "If no file is uploaded, a small synthetic demo will run."
95
  )
96
 
97
  with gr.Row():
98
- file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
99
  run_btn = gr.Button("Run Evaluation")
100
  download_report = gr.File(label="Download CSV Report")
101
 
102
- # βœ… Fixed Gallery (removed .style, added columns=2)
103
- gallery = gr.Gallery(
104
- label="Visualization Outputs",
105
- columns=2,
106
- height="auto"
107
- )
108
  table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
109
- leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")
110
 
111
  def on_run(file_in):
112
  (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
113
- # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
114
- gallery_display = []
115
- for i, p in enumerate(gallery_items):
116
- caption = captions[i] if i < len(captions) else ""
117
- gallery_display.append((p, caption))
118
  return gallery_display, metrics_df, lb
119
 
120
  run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
121
 
122
- gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
123
- "- `reference` can be empty but accuracy/hallucination will be weaker.\n"
124
- "- Visualization images are available in the Gallery and a CSV report is downloadable.")
125
 
126
  demo.launch()
 
1
+ # # app.py
2
+ # """
3
+ # Gradio application entrypoint for Hugging Face Spaces.
4
+ # """
5
+
6
+ # import os
7
+ # import tempfile
8
+ # import pandas as pd
9
+ # import gradio as gr
10
+ # from evaluator import evaluate_dataframe
11
+ # from synthetic_data import generate_synthetic_dataset
12
+
13
+ # # Helper to save uploaded file to local temp path (gradio File gives a NamedTemporaryFile-like object)
14
+ # def save_uploaded(file_obj):
15
+ # if not file_obj:
16
+ # return None
17
+ # # file_obj can be a dictionary or a file-like object depending on Gradio version
18
+ # try:
19
+ # path = file_obj.name
20
+ # return path
21
+ # except Exception:
22
+ # # fallback: write bytes to temp file
23
+ # data = file_obj.read()
24
+ # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
25
+ # fd, tmp = tempfile.mkstemp(suffix=suffix)
26
+ # with os.fdopen(fd, "wb") as f:
27
+ # f.write(data)
28
+ # return tmp
29
+
30
+ # def load_file_to_df(path):
31
+ # if path is None:
32
+ # return None
33
+ # # Try CSV
34
+ # try:
35
+ # if path.endswith(".csv"):
36
+ # return pd.read_csv(path)
37
+ # # JSONL
38
+ # try:
39
+ # return pd.read_json(path, lines=True)
40
+ # except ValueError:
41
+ # return pd.read_json(path)
42
+ # except Exception as e:
43
+ # # As last resort, raise
44
+ # raise e
45
+
46
+ # def run_evaluation(file_obj):
47
+ # # If no file provided, use synthetic demo
48
+ # if file_obj is None:
49
+ # df = generate_synthetic_dataset(num_agents=3, num_samples=12)
50
+ # else:
51
+ # path = save_uploaded(file_obj)
52
+ # df = load_file_to_df(path)
53
+
54
+ # # Ensure required columns exist; otherwise, attempt to map common alternatives
55
+ # if df is None:
56
+ # return None, "No data loaded", None
57
+
58
+ # # Try to normalize column names
59
+ # cols = {c.lower(): c for c in df.columns}
60
+ # # rename common variants
61
+ # rename_map = {}
62
+ # for k in ["prompt", "response", "task", "agent", "reference"]:
63
+ # if k not in cols:
64
+ # # try variants
65
+ # if k == "reference":
66
+ # for alt in ["answer", "ground_truth", "ref"]:
67
+ # if alt in cols:
68
+ # rename_map[cols[alt]] = k
69
+ # break
70
+ # else:
71
+ # for alt in [k, k.capitalize(), k.upper()]:
72
+ # if alt.lower() in cols:
73
+ # rename_map[cols[alt.lower()]] = k
74
+ # if rename_map:
75
+ # df = df.rename(columns=rename_map)
76
+
77
+ # metrics_df, images, leaderboard = evaluate_dataframe(df)
78
+
79
+ # # Prepare gallery (list of image file paths). Gradio Gallery accepts list of image paths or PIL images.
80
+ # gallery_items = [p for (p, caption) in images]
81
+ # captions = [caption for (p, caption) in images]
82
+
83
+ # # Save a CSV report for download
84
+ # out_csv = "/tmp/eval_results.csv"
85
+ # metrics_df.to_csv(out_csv, index=False)
86
+
87
+ # return (gallery_items, captions), metrics_df, leaderboard
88
+
89
+ # # Build Gradio UI
90
+ # with gr.Blocks() as demo:
91
+ # gr.Markdown("# Agentic Evaluation Framework")
92
+ # gr.Markdown(
93
+ # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference` (reference optional). "
94
+ # "If no file is uploaded, a small synthetic demo will run."
95
+ # )
96
+
97
+ # with gr.Row():
98
+ # file_input = gr.File(label="Upload CSV / JSON / JSONL (optional)", file_types=[".csv", ".json", ".jsonl"])
99
+ # run_btn = gr.Button("Run Evaluation")
100
+ # download_report = gr.File(label="Download CSV Report")
101
+
102
+ # # βœ… Fixed Gallery (removed .style, added columns=2)
103
+ # gallery = gr.Gallery(
104
+ # label="Visualization Outputs",
105
+ # columns=2,
106
+ # height="auto"
107
+ # )
108
+ # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
109
+ # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Final Score per Agent & Task)")
110
+
111
+ # def on_run(file_in):
112
+ # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
113
+ # # Save gallery captions mapping into a simple list of tuples for Gradio gallery (path, caption)
114
+ # gallery_display = []
115
+ # for i, p in enumerate(gallery_items):
116
+ # caption = captions[i] if i < len(captions) else ""
117
+ # gallery_display.append((p, caption))
118
+ # return gallery_display, metrics_df, lb
119
+
120
+ # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
121
+
122
+ # gr.Markdown("## Usage tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
123
+ # "- `reference` can be empty but accuracy/hallucination will be weaker.\n"
124
+ # "- Visualization images are available in the Gallery and a CSV report is downloadable.")
125
+
126
+ # demo.launch()
127
  # app.py
128
  """
129
  Gradio application entrypoint for Hugging Face Spaces.
 
133
  import tempfile
134
  import pandas as pd
135
  import gradio as gr
136
+ from evaluation import evaluate_dataframe # βœ… updated import
137
  from synthetic_data import generate_synthetic_dataset
138
 
139
+ # Helper to save uploaded file
140
  def save_uploaded(file_obj):
141
  if not file_obj:
142
  return None
 
143
  try:
144
+ return file_obj.name
 
145
  except Exception:
 
146
  data = file_obj.read()
147
  suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
148
  fd, tmp = tempfile.mkstemp(suffix=suffix)
 
153
  def load_file_to_df(path):
154
  if path is None:
155
  return None
 
156
  try:
157
  if path.endswith(".csv"):
158
  return pd.read_csv(path)
 
159
  try:
160
  return pd.read_json(path, lines=True)
161
  except ValueError:
162
  return pd.read_json(path)
163
  except Exception as e:
 
164
  raise e
165
 
166
  def run_evaluation(file_obj):
 
167
  if file_obj is None:
168
  df = generate_synthetic_dataset(num_agents=3, num_samples=12)
169
  else:
170
  path = save_uploaded(file_obj)
171
  df = load_file_to_df(path)
172
 
 
173
  if df is None:
174
  return None, "No data loaded", None
175
 
176
+ # Normalize column names
177
  cols = {c.lower(): c for c in df.columns}
 
178
  rename_map = {}
179
  for k in ["prompt", "response", "task", "agent", "reference"]:
180
  if k not in cols:
 
181
  if k == "reference":
182
  for alt in ["answer", "ground_truth", "ref"]:
183
  if alt in cols:
 
192
 
193
  metrics_df, images, leaderboard = evaluate_dataframe(df)
194
 
 
195
  gallery_items = [p for (p, caption) in images]
196
  captions = [caption for (p, caption) in images]
197
 
 
198
  out_csv = "/tmp/eval_results.csv"
199
  metrics_df.to_csv(out_csv, index=False)
200
 
 
204
  with gr.Blocks() as demo:
205
  gr.Markdown("# Agentic Evaluation Framework")
206
  gr.Markdown(
207
+ "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
208
+ "If no file is uploaded, a synthetic demo will run."
209
  )
210
 
211
  with gr.Row():
212
+ file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
213
  run_btn = gr.Button("Run Evaluation")
214
  download_report = gr.File(label="Download CSV Report")
215
 
216
+ gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
 
 
 
 
 
217
  table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
218
+ leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
219
 
220
  def on_run(file_in):
221
  (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
222
+ gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
 
 
 
 
223
  return gallery_display, metrics_df, lb
224
 
225
  run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
226
 
227
+ gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
228
+ "- `reference` optional.\n- Download CSV report after evaluation.")
 
229
 
230
  demo.launch()