Bhavya commited on
Commit
fec76d9
·
1 Parent(s): 9b80da8

added agent running and e2e eval activities

Browse files
analysis_src/utils.py CHANGED
@@ -1,5 +1,6 @@
1
  import json
2
  from pathlib import Path
 
3
 
4
  # Model display names (short for figures)
5
  # Follows ArtificialAnalysis.ai naming conventions
@@ -173,3 +174,72 @@ def find_latest_rollout_file(trial_dir: Path) -> Path:
173
  # Sort by modification time and return the latest
174
  return max(rollout_files, key=lambda p: p.stat().st_mtime)
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
  from pathlib import Path
3
+ import pandas as pd
4
 
5
  # Model display names (short for figures)
6
  # Follows ArtificialAnalysis.ai naming conventions
 
174
  # Sort by modification time and return the latest
175
  return max(rollout_files, key=lambda p: p.stat().st_mtime)
176
 
177
+ def json_to_filtered_df(path: str) -> pd.DataFrame:
178
+ """
179
+ Load a .json or .jsonl file, keep only rows whose payload.type is in
180
+ DESIRED_TYPES, select USEFUL_COLS, and return the DataFrame sorted by
181
+ timestamp ascending.
182
+
183
+ Parameters
184
+ ----------
185
+ path : str
186
+ Path to the JSON or JSON Lines file.
187
+
188
+ Returns
189
+ -------
190
+ pd.DataFrame
191
+ Tidied DataFrame ready for analysis/labs.
192
+ """
193
+ DESIRED_TYPES = {
194
+ "agent_message",
195
+ "function_call",
196
+ "function_call_output"
197
+ }
198
+
199
+ # Union of all “useful” columns
200
+ USEFUL_COLS = [
201
+ "timestamp",
202
+ "payload.type",
203
+ "payload.message",
204
+ "payload.role",
205
+ "payload.content",
206
+ "payload.name",
207
+ "payload.arguments",
208
+ "payload.call_id",
209
+ "payload.output",
210
+ ]
211
+ path = Path(path)
212
+ if not path.exists():
213
+ raise FileNotFoundError(f"{path} does not exist")
214
+
215
+ # 1. Load the records -----------------------------------------------------
216
+ if path.suffix.lower() in {".jsonl", ".ndjson"}:
217
+ with path.open("r", encoding="utf-8") as f:
218
+ records = [json.loads(line) for line in f if line.strip()]
219
+ else:
220
+ with path.open("r", encoding="utf-8") as f:
221
+ data = json.load(f)
222
+ records = data if isinstance(data, list) else [data]
223
+
224
+ # 2. Flatten nested JSON --------------------------------------------------
225
+ df = pd.json_normalize(records)
226
+
227
+ # 3. Filter by payload.type ----------------------------------------------
228
+ if "payload.type" not in df.columns:
229
+ raise KeyError("'payload.type' column missing from data")
230
+ df = df[df["payload.type"].isin(DESIRED_TYPES)].copy()
231
+
232
+ # 4. Ensure all useful columns exist (add empty if missing) --------------
233
+ for col in USEFUL_COLS:
234
+ if col not in df.columns:
235
+ df[col] = pd.NA
236
+
237
+ # 5. Subset to useful columns only ---------------------------------------
238
+ df = df[USEFUL_COLS]
239
+
240
+ # 6. Sort by timestamp ----------------------------------------------------
241
+ df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
242
+ df = df.sort_values("timestamp", ignore_index=True)
243
+
244
+ return df
245
+
download_run_scenario.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
evaluation.ipynb CHANGED
The diff for this file is too large to render. See raw diff