ITBench-Lite

Sleeping

App Files Files Community

rohan-arora-ibm commited on Jan 19

Commit

9b80da8

unverified ·

1 Parent(s): 1db0756

bump: updates in lieu of updated snapshotted data and trajectory structure

Browse files

Files changed (1) hide show

analysis_src/extract_discovery_trajectory.py +51 -17

analysis_src/extract_discovery_trajectory.py CHANGED Viewed

@@ -56,7 +56,7 @@ def extract_k8s_entities(text: str) -> List[str]:
 # Paths
 PROJECT_ROOT = Path(__file__).parent.parent
 LEADERBOARD_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "ReAct-Agent-Trajectories"
-GT_DIR = PROJECT_ROOT / "data" / "itbench-snapshots"
 OUTPUT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "output" / "discovery"
 @dataclass
@@ -149,14 +149,22 @@ def check_entity_match(text: str, entity_info: Dict) -> bool:
 def load_ground_truth(scenario: str) -> Optional[GroundTruth]:
-    """Load and parse ground truth YAML for a scenario."""
-    gt_path = GT_DIR / scenario / "ground_truth.yaml"
-    if not gt_path.exists():
         return None
-    with open(gt_path) as f:
-        gt_data = yaml.safe_load(f)
     # Find the root cause group
     root_cause_group = None
     all_groups = gt_data.get('groups', [])
@@ -609,6 +617,23 @@ def analyze_model(model_dir: Path, gt_cache: Dict[str, GroundTruth]) -> List[Tra
     results = []
     model_name = model_dir.name.replace("react with code_", "").split("_07ccdb1")[0]
     scenario_dirs = [d for d in sorted(model_dir.iterdir()) if d.is_dir() and d.name.startswith("Scenario-")]
     for scenario_dir in tqdm(scenario_dirs, desc=f"  {model_name} scenarios"):
         scenario = scenario_dir.name
@@ -743,19 +768,28 @@ def extract_all_data():
     # Create output directory
     OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-    # Load all ground truths
     print("\nLoading ground truth data...")
     gt_cache = {}
-    scenario_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("Scenario-")]
-    for scenario_dir in tqdm(scenario_dirs, desc="Loading ground truths"):
-        gt = load_ground_truth(scenario_dir.name)
-        if gt:
-            gt_cache[scenario_dir.name] = gt
     print(f"Loaded {len(gt_cache)} ground truth files")
-    # Find react with code agents
-    model_dirs = [d for d in LEADERBOARD_DIR.iterdir()
-                  if d.is_dir() and d.name.startswith("react with code_")]
     print(f"Found {len(model_dirs)} agent models")
     # Analyze each model

 # Paths
 PROJECT_ROOT = Path(__file__).parent.parent
 LEADERBOARD_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "ReAct-Agent-Trajectories"
+GT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Lite" / "snapshots" / "sre"
 OUTPUT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "output" / "discovery"
 @dataclass
 def load_ground_truth(scenario: str) -> Optional[GroundTruth]:
+    """Load and parse ground truth YAML for a scenario.
+    Searches for ground_truth.yaml in GT_DIR/v0.2-*/scenario/ground_truth.yaml
+    """
+    # Find the version directory (e.g., v0.2-something)
+    version_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("v0.2-")]
+    for version_dir in version_dirs:
+        gt_path = version_dir / scenario / "ground_truth.yaml"
+        if gt_path.exists():
+            with open(gt_path) as f:
+                gt_data = yaml.safe_load(f)
+            break
+    else:
         return None
     # Find the root cause group
     root_cause_group = None
     all_groups = gt_data.get('groups', [])
     results = []
     model_name = model_dir.name.replace("react with code_", "").split("_07ccdb1")[0]
+    # Check if directory contains Scenario folders directly, or if we need to go one level deeper
+    # (e.g., model_dir/sre/Scenario-1, model_dir/finops/Scenario-1, etc.)
+    has_scenarios = any(d.name.startswith("Scenario") for d in model_dir.iterdir() if d.is_dir())
+    if not has_scenarios:
+        # Look for subdirectories that might contain scenarios (sre, finops, etc.)
+        subdirs = [d for d in model_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
+        if len(subdirs) == 1:
+            # If there's exactly one subdirectory, use it
+            model_dir = subdirs[0]
+        elif len(subdirs) > 1:
+            # If there are multiple, try to find one with Scenario folders
+            for subdir in subdirs:
+                if any(d.name.startswith("Scenario") for d in subdir.iterdir() if d.is_dir()):
+                    model_dir = subdir
+                    break
     scenario_dirs = [d for d in sorted(model_dir.iterdir()) if d.is_dir() and d.name.startswith("Scenario-")]
     for scenario_dir in tqdm(scenario_dirs, desc=f"  {model_name} scenarios"):
         scenario = scenario_dir.name
     # Create output directory
     OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    # Load all ground truths from GT_DIR
     print("\nLoading ground truth data...")
     gt_cache = {}
+    # Find version directories (e.g., v0.2-*)
+    if GT_DIR.exists():
+        version_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("v0.2-")]
+        for version_dir in version_dirs:
+            scenario_dirs = [d for d in version_dir.iterdir() if d.is_dir() and d.name.startswith("Scenario-")]
+            for scenario_dir in tqdm(scenario_dirs, desc="Loading ground truths"):
+                gt = load_ground_truth(scenario_dir.name)
+                if gt:
+                    gt_cache[scenario_dir.name] = gt
+    else:
+        print(f"Warning: GT_DIR not found at {GT_DIR}")
     print(f"Loaded {len(gt_cache)} ground truth files")
+    # Find all agent directories (excluding hidden and backup directories)
+    model_dirs = [d for d in LEADERBOARD_DIR.iterdir()
+                  if d.is_dir() and not d.name.startswith(".") and not d.name.startswith("backup_")]
     print(f"Found {len(model_dirs)} agent models")
     # Analyze each model