ITBench-Lite

Sleeping

rohan-arora-ibm commited on Jan 18

Commit

7f74217

unverified ·

1 Parent(s): 4f5e74b

fix: getting the trajectories when the folder hierarchy has domain

Files changed (3) hide show

analysis_src/extract_inference_data.py CHANGED Viewed

@@ -244,6 +244,24 @@ def read_agent_stats(agent_dir: Path) -> dict[str, list[dict]]:
         Dict mapping scenario_id -> list of stats (one per trial)
     """
     scenario_data = {}
     for scenario_dir in agent_dir.iterdir():
         if not scenario_dir.is_dir() or not scenario_dir.name.startswith("Scenario"):

         Dict mapping scenario_id -> list of stats (one per trial)
     """
     scenario_data = {}
+    # Check if directory contains Scenario folders directly, or if we need to go one level deeper
+    # (e.g., agent_dir/sre/Scenario-1, agent_dir/finops/Scenario-1, etc.)
+    has_scenarios = any(d.name.startswith("Scenario") for d in agent_dir.iterdir() if d.is_dir())
+    if not has_scenarios:
+        # Look for subdirectories that might contain scenarios (sre, finops, etc.)
+        subdirs = [d for d in agent_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
+        if len(subdirs) == 1:
+            # If there's exactly one subdirectory, use it
+            agent_dir = subdirs[0]
+        elif len(subdirs) > 1:
+            # If there are multiple, try to find one with Scenario folders
+            for subdir in subdirs:
+                if any(d.name.startswith("Scenario") for d in subdir.iterdir() if d.is_dir()):
+                    agent_dir = subdir
+                    break
     for scenario_dir in agent_dir.iterdir():
         if not scenario_dir.is_dir() or not scenario_dir.name.startswith("Scenario"):

analysis_src/extract_tool_failures.py CHANGED Viewed

@@ -245,6 +245,24 @@ def read_agent_stats(agent_dir: Path) -> dict[str, list[dict]]:
         Dict mapping scenario_id -> list of stats (one per trial)
     """
     scenario_data = {}
     for scenario_dir in agent_dir.iterdir():
         if not scenario_dir.is_dir() or not scenario_dir.name.startswith("Scenario"):

         Dict mapping scenario_id -> list of stats (one per trial)
     """
     scenario_data = {}
+    # Check if directory contains Scenario folders directly, or if we need to go one level deeper
+    # (e.g., agent_dir/sre/Scenario-1, agent_dir/finops/Scenario-1, etc.)
+    has_scenarios = any(d.name.startswith("Scenario") for d in agent_dir.iterdir() if d.is_dir())
+    if not has_scenarios:
+        # Look for subdirectories that might contain scenarios (sre, finops, etc.)
+        subdirs = [d for d in agent_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
+        if len(subdirs) == 1:
+            # If there's exactly one subdirectory, use it
+            agent_dir = subdirs[0]
+        elif len(subdirs) > 1:
+            # If there are multiple, try to find one with Scenario folders
+            for subdir in subdirs:
+                if any(d.name.startswith("Scenario") for d in subdir.iterdir() if d.is_dir()):
+                    agent_dir = subdir
+                    break
     for scenario_dir in agent_dir.iterdir():
         if not scenario_dir.is_dir() or not scenario_dir.name.startswith("Scenario"):

analysis_src/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import json
 from pathlib import Path
 # Model display names (short for figures)
@@ -56,6 +56,23 @@ def read_judge_outputs_from_dir(agent_dir: Path) -> dict[str, list[dict]]:
     """
     scenario_data = {}
     for scenario_dir in agent_dir.iterdir():
         if not scenario_dir.is_dir() or not scenario_dir.name.startswith("Scenario"):
             continue

+analysis_src/utils.pyimport json
 from pathlib import Path
 # Model display names (short for figures)
     """
     scenario_data = {}
+    # Check if directory contains Scenario folders directly, or if we need to go one level deeper
+    # (e.g., agent_dir/sre/Scenario-1, agent_dir/finops/Scenario-1, etc.)
+    has_scenarios = any(d.name.startswith("Scenario") for d in agent_dir.iterdir() if d.is_dir())
+    if not has_scenarios:
+        # Look for subdirectories that might contain scenarios (sre, finops, etc.)
+        subdirs = [d for d in agent_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
+        if len(subdirs) == 1:
+            # If there's exactly one subdirectory, use it
+            agent_dir = subdirs[0]
+        elif len(subdirs) > 1:
+            # If there are multiple, try to find one with Scenario folders
+            for subdir in subdirs:
+                if any(d.name.startswith("Scenario") for d in subdir.iterdir() if d.is_dir()):
+                    agent_dir = subdir
+                    break
     for scenario_dir in agent_dir.iterdir():
         if not scenario_dir.is_dir() or not scenario_dir.name.startswith("Scenario"):
             continue