Spaces:
Sleeping
Sleeping
bump: updates in lieu of updated snapshotted data and trajectory structure
Browse files
analysis_src/extract_discovery_trajectory.py
CHANGED
|
@@ -56,7 +56,7 @@ def extract_k8s_entities(text: str) -> List[str]:
|
|
| 56 |
# Paths
|
| 57 |
PROJECT_ROOT = Path(__file__).parent.parent
|
| 58 |
LEADERBOARD_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "ReAct-Agent-Trajectories"
|
| 59 |
-
GT_DIR = PROJECT_ROOT / "
|
| 60 |
OUTPUT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "output" / "discovery"
|
| 61 |
|
| 62 |
@dataclass
|
|
@@ -149,14 +149,22 @@ def check_entity_match(text: str, entity_info: Dict) -> bool:
|
|
| 149 |
|
| 150 |
|
| 151 |
def load_ground_truth(scenario: str) -> Optional[GroundTruth]:
|
| 152 |
-
"""Load and parse ground truth YAML for a scenario.
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
return None
|
| 156 |
|
| 157 |
-
with open(gt_path) as f:
|
| 158 |
-
gt_data = yaml.safe_load(f)
|
| 159 |
-
|
| 160 |
# Find the root cause group
|
| 161 |
root_cause_group = None
|
| 162 |
all_groups = gt_data.get('groups', [])
|
|
@@ -609,6 +617,23 @@ def analyze_model(model_dir: Path, gt_cache: Dict[str, GroundTruth]) -> List[Tra
|
|
| 609 |
results = []
|
| 610 |
model_name = model_dir.name.replace("react with code_", "").split("_07ccdb1")[0]
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
scenario_dirs = [d for d in sorted(model_dir.iterdir()) if d.is_dir() and d.name.startswith("Scenario-")]
|
| 613 |
for scenario_dir in tqdm(scenario_dirs, desc=f" {model_name} scenarios"):
|
| 614 |
scenario = scenario_dir.name
|
|
@@ -743,19 +768,28 @@ def extract_all_data():
|
|
| 743 |
# Create output directory
|
| 744 |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 745 |
|
| 746 |
-
# Load all ground truths
|
| 747 |
print("\nLoading ground truth data...")
|
| 748 |
gt_cache = {}
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
if
|
| 753 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 754 |
print(f"Loaded {len(gt_cache)} ground truth files")
|
| 755 |
-
|
| 756 |
-
# Find
|
| 757 |
-
model_dirs = [d for d in LEADERBOARD_DIR.iterdir()
|
| 758 |
-
if d.is_dir() and d.name.startswith("
|
| 759 |
print(f"Found {len(model_dirs)} agent models")
|
| 760 |
|
| 761 |
# Analyze each model
|
|
|
|
| 56 |
# Paths
|
| 57 |
PROJECT_ROOT = Path(__file__).parent.parent
|
| 58 |
LEADERBOARD_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "ReAct-Agent-Trajectories"
|
| 59 |
+
GT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Lite" / "snapshots" / "sre"
|
| 60 |
OUTPUT_DIR = PROJECT_ROOT / "ITBench-SRE-Agent" / "ITBench-Trajectories" / "output" / "discovery"
|
| 61 |
|
| 62 |
@dataclass
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
def load_ground_truth(scenario: str) -> Optional[GroundTruth]:
|
| 152 |
+
"""Load and parse ground truth YAML for a scenario.
|
| 153 |
+
|
| 154 |
+
Searches for ground_truth.yaml in GT_DIR/v0.2-*/scenario/ground_truth.yaml
|
| 155 |
+
"""
|
| 156 |
+
# Find the version directory (e.g., v0.2-something)
|
| 157 |
+
version_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("v0.2-")]
|
| 158 |
+
|
| 159 |
+
for version_dir in version_dirs:
|
| 160 |
+
gt_path = version_dir / scenario / "ground_truth.yaml"
|
| 161 |
+
if gt_path.exists():
|
| 162 |
+
with open(gt_path) as f:
|
| 163 |
+
gt_data = yaml.safe_load(f)
|
| 164 |
+
break
|
| 165 |
+
else:
|
| 166 |
return None
|
| 167 |
|
|
|
|
|
|
|
|
|
|
| 168 |
# Find the root cause group
|
| 169 |
root_cause_group = None
|
| 170 |
all_groups = gt_data.get('groups', [])
|
|
|
|
| 617 |
results = []
|
| 618 |
model_name = model_dir.name.replace("react with code_", "").split("_07ccdb1")[0]
|
| 619 |
|
| 620 |
+
# Check if directory contains Scenario folders directly, or if we need to go one level deeper
|
| 621 |
+
# (e.g., model_dir/sre/Scenario-1, model_dir/finops/Scenario-1, etc.)
|
| 622 |
+
has_scenarios = any(d.name.startswith("Scenario") for d in model_dir.iterdir() if d.is_dir())
|
| 623 |
+
|
| 624 |
+
if not has_scenarios:
|
| 625 |
+
# Look for subdirectories that might contain scenarios (sre, finops, etc.)
|
| 626 |
+
subdirs = [d for d in model_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
|
| 627 |
+
if len(subdirs) == 1:
|
| 628 |
+
# If there's exactly one subdirectory, use it
|
| 629 |
+
model_dir = subdirs[0]
|
| 630 |
+
elif len(subdirs) > 1:
|
| 631 |
+
# If there are multiple, try to find one with Scenario folders
|
| 632 |
+
for subdir in subdirs:
|
| 633 |
+
if any(d.name.startswith("Scenario") for d in subdir.iterdir() if d.is_dir()):
|
| 634 |
+
model_dir = subdir
|
| 635 |
+
break
|
| 636 |
+
|
| 637 |
scenario_dirs = [d for d in sorted(model_dir.iterdir()) if d.is_dir() and d.name.startswith("Scenario-")]
|
| 638 |
for scenario_dir in tqdm(scenario_dirs, desc=f" {model_name} scenarios"):
|
| 639 |
scenario = scenario_dir.name
|
|
|
|
| 768 |
# Create output directory
|
| 769 |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 770 |
|
| 771 |
+
# Load all ground truths from GT_DIR
|
| 772 |
print("\nLoading ground truth data...")
|
| 773 |
gt_cache = {}
|
| 774 |
+
|
| 775 |
+
# Find version directories (e.g., v0.2-*)
|
| 776 |
+
if GT_DIR.exists():
|
| 777 |
+
version_dirs = [d for d in GT_DIR.iterdir() if d.is_dir() and d.name.startswith("v0.2-")]
|
| 778 |
+
|
| 779 |
+
for version_dir in version_dirs:
|
| 780 |
+
scenario_dirs = [d for d in version_dir.iterdir() if d.is_dir() and d.name.startswith("Scenario-")]
|
| 781 |
+
for scenario_dir in tqdm(scenario_dirs, desc="Loading ground truths"):
|
| 782 |
+
gt = load_ground_truth(scenario_dir.name)
|
| 783 |
+
if gt:
|
| 784 |
+
gt_cache[scenario_dir.name] = gt
|
| 785 |
+
else:
|
| 786 |
+
print(f"Warning: GT_DIR not found at {GT_DIR}")
|
| 787 |
+
|
| 788 |
print(f"Loaded {len(gt_cache)} ground truth files")
|
| 789 |
+
|
| 790 |
+
# Find all agent directories (excluding hidden and backup directories)
|
| 791 |
+
model_dirs = [d for d in LEADERBOARD_DIR.iterdir()
|
| 792 |
+
if d.is_dir() and not d.name.startswith(".") and not d.name.startswith("backup_")]
|
| 793 |
print(f"Found {len(model_dirs)} agent models")
|
| 794 |
|
| 795 |
# Analyze each model
|