Files changed (1) hide show
  1. app.py +103 -103
app.py CHANGED
@@ -125,106 +125,106 @@
125
 
126
  # demo.launch()
127
  # app.py
128
- """
129
- Gradio application entrypoint for Hugging Face Spaces.
130
- """
131
-
132
- import os
133
- import tempfile
134
- import pandas as pd
135
- import gradio as gr
136
- from evaluation import evaluate_dataframe # βœ… updated import
137
- from synthetic_data import generate_synthetic_dataset
138
-
139
- # Helper to save uploaded file
140
- def save_uploaded(file_obj):
141
- if not file_obj:
142
- return None
143
- try:
144
- return file_obj.name
145
- except Exception:
146
- data = file_obj.read()
147
- suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
148
- fd, tmp = tempfile.mkstemp(suffix=suffix)
149
- with os.fdopen(fd, "wb") as f:
150
- f.write(data)
151
- return tmp
152
-
153
- def load_file_to_df(path):
154
- if path is None:
155
- return None
156
- try:
157
- if path.endswith(".csv"):
158
- return pd.read_csv(path)
159
- try:
160
- return pd.read_json(path, lines=True)
161
- except ValueError:
162
- return pd.read_json(path)
163
- except Exception as e:
164
- raise e
165
-
166
- def run_evaluation(file_obj):
167
- if file_obj is None:
168
- df = generate_synthetic_dataset(num_agents=3, num_samples=12)
169
- else:
170
- path = save_uploaded(file_obj)
171
- df = load_file_to_df(path)
172
-
173
- if df is None:
174
- return None, "No data loaded", None
175
-
176
- # Normalize column names
177
- cols = {c.lower(): c for c in df.columns}
178
- rename_map = {}
179
- for k in ["prompt", "response", "task", "agent", "reference"]:
180
- if k not in cols:
181
- if k == "reference":
182
- for alt in ["answer", "ground_truth", "ref"]:
183
- if alt in cols:
184
- rename_map[cols[alt]] = k
185
- break
186
- else:
187
- for alt in [k, k.capitalize(), k.upper()]:
188
- if alt.lower() in cols:
189
- rename_map[cols[alt.lower()]] = k
190
- if rename_map:
191
- df = df.rename(columns=rename_map)
192
-
193
- metrics_df, images, leaderboard = evaluate_dataframe(df)
194
-
195
- gallery_items = [p for (p, caption) in images]
196
- captions = [caption for (p, caption) in images]
197
-
198
- out_csv = "/tmp/eval_results.csv"
199
- metrics_df.to_csv(out_csv, index=False)
200
-
201
- return (gallery_items, captions), metrics_df, leaderboard
202
-
203
- # Build Gradio UI
204
- with gr.Blocks() as demo:
205
- gr.Markdown("# Agentic Evaluation Framework")
206
- gr.Markdown(
207
- "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
208
- "If no file is uploaded, a synthetic demo will run."
209
- )
210
-
211
- with gr.Row():
212
- file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
213
- run_btn = gr.Button("Run Evaluation")
214
- download_report = gr.File(label="Download CSV Report")
215
-
216
- gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
217
- table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
218
- leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
219
-
220
- def on_run(file_in):
221
- (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
222
- gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
223
- return gallery_display, metrics_df, lb
224
-
225
- run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
226
-
227
- gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
228
- "- `reference` optional.\n- Download CSV report after evaluation.")
229
-
230
- demo.launch()
 
125
 
126
  # demo.launch()
127
  # app.py
128
+ # """
129
+ # Gradio application entrypoint for Hugging Face Spaces.
130
+ # """
131
+
132
+ # import os
133
+ # import tempfile
134
+ # import pandas as pd
135
+ # import gradio as gr
136
+ # from evaluation import evaluate_dataframe # βœ… updated import
137
+ # from synthetic_data import generate_synthetic_dataset
138
+
139
+ # # Helper to save uploaded file
140
+ # def save_uploaded(file_obj):
141
+ # if not file_obj:
142
+ # return None
143
+ # try:
144
+ # return file_obj.name
145
+ # except Exception:
146
+ # data = file_obj.read()
147
+ # suffix = ".csv" if file_obj.name.endswith(".csv") else ".json"
148
+ # fd, tmp = tempfile.mkstemp(suffix=suffix)
149
+ # with os.fdopen(fd, "wb") as f:
150
+ # f.write(data)
151
+ # return tmp
152
+
153
+ # def load_file_to_df(path):
154
+ # if path is None:
155
+ # return None
156
+ # try:
157
+ # if path.endswith(".csv"):
158
+ # return pd.read_csv(path)
159
+ # try:
160
+ # return pd.read_json(path, lines=True)
161
+ # except ValueError:
162
+ # return pd.read_json(path)
163
+ # except Exception as e:
164
+ # raise e
165
+
166
+ # def run_evaluation(file_obj):
167
+ # if file_obj is None:
168
+ # df = generate_synthetic_dataset(num_agents=3, num_samples=12)
169
+ # else:
170
+ # path = save_uploaded(file_obj)
171
+ # df = load_file_to_df(path)
172
+
173
+ # if df is None:
174
+ # return None, "No data loaded", None
175
+
176
+ # # Normalize column names
177
+ # cols = {c.lower(): c for c in df.columns}
178
+ # rename_map = {}
179
+ # for k in ["prompt", "response", "task", "agent", "reference"]:
180
+ # if k not in cols:
181
+ # if k == "reference":
182
+ # for alt in ["answer", "ground_truth", "ref"]:
183
+ # if alt in cols:
184
+ # rename_map[cols[alt]] = k
185
+ # break
186
+ # else:
187
+ # for alt in [k, k.capitalize(), k.upper()]:
188
+ # if alt.lower() in cols:
189
+ # rename_map[cols[alt.lower()]] = k
190
+ # if rename_map:
191
+ # df = df.rename(columns=rename_map)
192
+
193
+ # metrics_df, images, leaderboard = evaluate_dataframe(df)
194
+
195
+ # gallery_items = [p for (p, caption) in images]
196
+ # captions = [caption for (p, caption) in images]
197
+
198
+ # out_csv = "/tmp/eval_results.csv"
199
+ # metrics_df.to_csv(out_csv, index=False)
200
+
201
+ # return (gallery_items, captions), metrics_df, leaderboard
202
+
203
+ # # Build Gradio UI
204
+ # with gr.Blocks() as demo:
205
+ # gr.Markdown("# Agentic Evaluation Framework")
206
+ # gr.Markdown(
207
+ # "Upload a CSV/JSON/JSONL with columns: `prompt,response,task,agent,reference`. "
208
+ # "If no file is uploaded, a synthetic demo will run."
209
+ # )
210
+
211
+ # with gr.Row():
212
+ # file_input = gr.File(label="Upload CSV/JSON/JSONL", file_types=[".csv", ".json", ".jsonl"])
213
+ # run_btn = gr.Button("Run Evaluation")
214
+ # download_report = gr.File(label="Download CSV Report")
215
+
216
+ # gallery = gr.Gallery(label="Visualization Outputs", columns=2, height="auto")
217
+ # table = gr.Dataframe(headers=None, label="Per-example Metrics (detailed)")
218
+ # leaderboard = gr.Dataframe(headers=None, label="Leaderboard (Avg Score per Agent & Task)")
219
+
220
+ # def on_run(file_in):
221
+ # (gallery_items, captions), metrics_df, lb = run_evaluation(file_in)
222
+ # gallery_display = [(p, captions[i] if i < len(captions) else "") for i, p in enumerate(gallery_items)]
223
+ # return gallery_display, metrics_df, lb
224
+
225
+ # run_btn.click(fn=on_run, inputs=[file_input], outputs=[gallery, table, leaderboard])
226
+
227
+ # gr.Markdown("## Tips\n- Columns: `prompt,response,task,agent,reference` (case-insensitive). "
228
+ # "- `reference` optional.\n- Download CSV report after evaluation.")
229
+
230
+ # demo.launch()