rohan-arora-ibm commited on
Commit
9b80da8
·
unverified ·
1 Parent(s): 1db0756

bump: updates in lieu of updated snapshotted data and trajectory structure

Browse files
analysis_src/extract_discovery_trajectory.py CHANGED
@@ -56,7 +56,7 @@ def extract_k8s_entities(text: str) -> List[str]:
56
  # Paths
57
  PROJECT_ROOT = Path(__file__).parent.parent
58
  LEADERBOARD_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "ReAct-Agent-Trajectories"
59
- GT_DIR = PROJECT_ROOT / "data" / "itbench-snapshots"
60
  OUTPUT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "output" / "discovery"
61
 
62
  @dataclass
@@ -149,14 +149,22 @@ def check_entity_match(text: str, entity_info: Dict) -> bool:
149
 
150
 
151
  def load_ground_truth(scenario: str) -> Optional[GroundTruth]:
152
- """Load and parse ground truth YAML for a scenario."""
153
- gt_path = GT_DIR / scenario / "ground_truth.yaml"
154
- if not gt_path.exists():
 
 
 
 
 
 
 
 
 
 
 
155
  return None
156
 
157
- with open(gt_path) as f:
158
- gt_data = yaml.safe_load(f)
159
-
160
  # Find the root cause group
161
  root_cause_group = None
162
  all_groups = gt_data.get('groups', [])
@@ -609,6 +617,23 @@ def analyze_model(model_dir: Path, gt_cache: Dict[str, GroundTruth]) -> List[Tra
609
  results = []
610
  model_name = model_dir.name.replace("react with code_", "").split("_07ccdb1")[0]
611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  scenario_dirs = [d for d in sorted(model_dir.iterdir()) if d.is_dir() and d.name.startswith("Scenario-")]
613
  for scenario_dir in tqdm(scenario_dirs, desc=f" {model_name} scenarios"):
614
  scenario = scenario_dir.name
@@ -743,19 +768,28 @@ def extract_all_data():
743
  # Create output directory
744
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
745
 
746
- # Load all ground truths
747
  print("\nLoading ground truth data...")
748
  gt_cache = {}
749
- scenario_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("Scenario-")]
750
- for scenario_dir in tqdm(scenario_dirs, desc="Loading ground truths"):
751
- gt = load_ground_truth(scenario_dir.name)
752
- if gt:
753
- gt_cache[scenario_dir.name] = gt
 
 
 
 
 
 
 
 
 
754
  print(f"Loaded {len(gt_cache)} ground truth files")
755
-
756
- # Find react with code agents
757
- model_dirs = [d for d in LEADERBOARD_DIR.iterdir()
758
- if d.is_dir() and d.name.startswith("react with code_")]
759
  print(f"Found {len(model_dirs)} agent models")
760
 
761
  # Analyze each model
 
56
  # Paths
57
  PROJECT_ROOT = Path(__file__).parent.parent
58
  LEADERBOARD_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "ReAct-Agent-Trajectories"
59
+ GT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Lite" / "snapshots" / "sre"
60
  OUTPUT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "output" / "discovery"
61
 
62
  @dataclass
 
149
 
150
 
151
  def load_ground_truth(scenario: str) -> Optional[GroundTruth]:
152
+ """Load and parse ground truth YAML for a scenario.
153
+
154
+ Searches for ground_truth.yaml in GT_DIR/v0.2-*/scenario/ground_truth.yaml
155
+ """
156
+ # Find the version directory (e.g., v0.2-something)
157
+ version_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("v0.2-")]
158
+
159
+ for version_dir in version_dirs:
160
+ gt_path = version_dir / scenario / "ground_truth.yaml"
161
+ if gt_path.exists():
162
+ with open(gt_path) as f:
163
+ gt_data = yaml.safe_load(f)
164
+ break
165
+ else:
166
  return None
167
 
 
 
 
168
  # Find the root cause group
169
  root_cause_group = None
170
  all_groups = gt_data.get('groups', [])
 
617
  results = []
618
  model_name = model_dir.name.replace("react with code_", "").split("_07ccdb1")[0]
619
 
620
+ # Check if directory contains Scenario folders directly, or if we need to go one level deeper
621
+ # (e.g., model_dir/sre/Scenario-1, model_dir/finops/Scenario-1, etc.)
622
+ has_scenarios = any(d.name.startswith("Scenario") for d in model_dir.iterdir() if d.is_dir())
623
+
624
+ if not has_scenarios:
625
+ # Look for subdirectories that might contain scenarios (sre, finops, etc.)
626
+ subdirs = [d for d in model_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
627
+ if len(subdirs) == 1:
628
+ # If there's exactly one subdirectory, use it
629
+ model_dir = subdirs[0]
630
+ elif len(subdirs) > 1:
631
+ # If there are multiple, try to find one with Scenario folders
632
+ for subdir in subdirs:
633
+ if any(d.name.startswith("Scenario") for d in subdir.iterdir() if d.is_dir()):
634
+ model_dir = subdir
635
+ break
636
+
637
  scenario_dirs = [d for d in sorted(model_dir.iterdir()) if d.is_dir() and d.name.startswith("Scenario-")]
638
  for scenario_dir in tqdm(scenario_dirs, desc=f" {model_name} scenarios"):
639
  scenario = scenario_dir.name
 
768
  # Create output directory
769
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
770
 
771
+ # Load all ground truths from GT_DIR
772
  print("\nLoading ground truth data...")
773
  gt_cache = {}
774
+
775
+ # Find version directories (e.g., v0.2-*)
776
+ if GT_DIR.exists():
777
+ version_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("v0.2-")]
778
+
779
+ for version_dir in version_dirs:
780
+ scenario_dirs = [d for d in version_dir.iterdir() if d.is_dir() and d.name.startswith("Scenario-")]
781
+ for scenario_dir in tqdm(scenario_dirs, desc="Loading ground truths"):
782
+ gt = load_ground_truth(scenario_dir.name)
783
+ if gt:
784
+ gt_cache[scenario_dir.name] = gt
785
+ else:
786
+ print(f"Warning: GT_DIR not found at {GT_DIR}")
787
+
788
  print(f"Loaded {len(gt_cache)} ground truth files")
789
+
790
+ # Find all agent directories (excluding hidden and backup directories)
791
+ model_dirs = [d for d in LEADERBOARD_DIR.iterdir()
792
+ if d.is_dir() and not d.name.startswith(".") and not d.name.startswith("backup_")]
793
  print(f"Found {len(model_dirs)} agent models")
794
 
795
  # Analyze each model