| ο»Ώ""" |
| RecallTrace β ContaminationEnv Simulation |
| Tasks 1-9: Environment, Tools, F1, Hidden Nodes, |
| Belief Calibration, Training, Curriculum, Plots |
| """ |
|
|
| |
| |
|
|
| import json |
| import os |
| import numpy as np |
| import networkx as nx |
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
|
|
| |
| os.makedirs("plots", exist_ok=True) |
| PLOT_DIR = "plots" |
| RESULTS_FILE = "training_results.json" |
|
|
|
|
| |
| |
| |
|
|
| class ContaminationEnv: |
| """ |
| Supply-chain contamination environment with: |
| - Random DAG generation per reset() [Task 1] |
| - 4 noisy investigation tools [Task 2] |
| - F1-scored finalize() [Task 3] |
| - Hidden intervention nodes [Task 4] |
| - Belief-calibrated finalize_with_beliefs() [Task 5] |
| - Adversarial curriculum difficulty levels [Task 7] |
| """ |
|
|
| def __init__(self, difficulty_level: int = 3): |
| self.graph = None |
| self.contaminated_nodes: set = set() |
| self.hidden_nodes: set = set() |
| self.source_nodes: set = set() |
| self.difficulty_level = max(1, min(5, difficulty_level)) |
|
|
| def set_difficulty(self, level: int) -> None: |
| """Set difficulty 1 (easy) β¦ 5 (very hard).""" |
| self.difficulty_level = max(1, min(5, level)) |
|
|
| |
|
|
| def reset(self) -> dict: |
| """Generate a new contamination scenario scaled to current difficulty.""" |
| params = { |
| 1: dict(n_range=(6, 8), n_sources=2, n_hidden=0, edge_p=0.25), |
| 2: dict(n_range=(8, 10), n_sources=2, n_hidden=1, edge_p=0.30), |
| 3: dict(n_range=(10, 13), n_sources=3, n_hidden=1, edge_p=0.30), |
| 4: dict(n_range=(12, 14), n_sources=3, n_hidden=2, edge_p=0.35), |
| 5: dict(n_range=(14, 16), n_sources=4, n_hidden=2, edge_p=0.40), |
| }[self.difficulty_level] |
|
|
| n_nodes = np.random.randint(*params["n_range"]) |
| self.graph = nx.DiGraph() |
| self.graph.add_nodes_from(range(n_nodes)) |
|
|
| for i in range(n_nodes): |
| for j in range(i + 1, n_nodes): |
| if np.random.random() < params["edge_p"]: |
| self.graph.add_edge(i, j) |
|
|
| n_sources = min(params["n_sources"], n_nodes) |
| self.source_nodes = set( |
| np.random.choice(n_nodes, n_sources, replace=False).tolist() |
| ) |
|
|
| n_hidden = min(params["n_hidden"], len(self.source_nodes)) |
| self.hidden_nodes = ( |
| set(np.random.choice(list(self.source_nodes), n_hidden, replace=False).tolist()) |
| if n_hidden > 0 else set() |
| ) |
|
|
| self.contaminated_nodes = set(self.source_nodes) |
| self._spread_contamination() |
|
|
| return { |
| "n_nodes": n_nodes, |
| "graph_structure": list(self.graph.edges()), |
| "observable_nodes": [n for n in range(n_nodes) if n not in self.hidden_nodes], |
| "difficulty": self.difficulty_level, |
| "n_hidden": len(self.hidden_nodes), |
| "message": ( |
| f"Difficulty {self.difficulty_level}: {n_nodes}-node graph, " |
| f"{len(self.hidden_nodes)} hidden source(s)." |
| ), |
| } |
|
|
| def _spread_contamination(self) -> None: |
| to_contaminate = set(self.contaminated_nodes) |
| for source in self.contaminated_nodes: |
| to_contaminate.update(nx.descendants(self.graph, source)) |
| self.contaminated_nodes = to_contaminate |
|
|
| |
|
|
| def inspect_node(self, node_id: int) -> dict: |
| """Noisy visual inspection (80% TP / 10% FP). Blocked on hidden nodes.""" |
| if node_id not in self.graph.nodes(): |
| return {"error": "Node does not exist"} |
| if node_id in self.hidden_nodes: |
| return { |
| "error": "Cannot inspect this node", |
| "reason": "Node is not directly observable", |
| "hint": "Examine downstream nodes to infer its state", |
| } |
| is_cont = node_id in self.contaminated_nodes |
| obs = np.random.random() < (0.8 if is_cont else 0.1) |
| return { |
| "node_id": node_id, |
| "appears_contaminated": bool(obs), |
| "confidence": "medium", |
| "upstream_count": len(list(self.graph.predecessors(node_id))), |
| "downstream_count": len(list(self.graph.successors(node_id))), |
| } |
|
|
| def test_batch(self, node_id: int) -> dict: |
| """Lab test (95% TP / 5% FP). Blocked on hidden nodes.""" |
| if node_id not in self.graph.nodes(): |
| return {"error": "Node does not exist"} |
| if node_id in self.hidden_nodes: |
| return { |
| "error": "Cannot test this node", |
| "reason": "Node is not directly testable", |
| "hint": "Infer contamination from causal structure", |
| } |
| is_cont = node_id in self.contaminated_nodes |
| pos = np.random.random() < (0.95 if is_cont else 0.05) |
| return { |
| "node_id": node_id, |
| "test_result": "POSITIVE" if pos else "NEGATIVE", |
| "confidence": "high", |
| "cost": 10, |
| } |
|
|
| def trace_upstream(self, node_id: int) -> dict: |
| if node_id not in self.graph.nodes(): |
| return {"error": "Node does not exist"} |
| parents = list(self.graph.predecessors(node_id)) |
| return {"node_id": node_id, "immediate_upstream": parents, "upstream_count": len(parents)} |
|
|
| def trace_downstream(self, node_id: int) -> dict: |
| if node_id not in self.graph.nodes(): |
| return {"error": "Node does not exist"} |
| children = list(self.graph.successors(node_id)) |
| return {"node_id": node_id, "immediate_downstream": children, "downstream_count": len(children)} |
|
|
| |
|
|
| def finalize(self, suspected_nodes) -> dict: |
| """Score binary guess with F1 (precision + recall).""" |
| suspected = set(suspected_nodes) |
| actual = self.contaminated_nodes |
| tp = len(suspected & actual) |
| fp = len(suspected - actual) |
| fn = len(actual - suspected) |
| precision = tp / (tp + fp) if suspected else 0.0 |
| recall = tp / (tp + fn) if actual else 0.0 |
| f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0 |
| return { |
| "f1_score": f1, "precision": precision, "recall": recall, |
| "true_positives": tp, "false_positives": fp, "false_negatives": fn, |
| "suspected_nodes": list(suspected), "actual_contaminated": list(actual), |
| "total_nodes": self.graph.number_of_nodes(), |
| } |
|
|
| |
|
|
| def finalize_with_beliefs(self, beliefs: dict) -> dict: |
| """ |
| Score the agent's probabilistic beliefs. |
| |
| Args: |
| beliefs: {node_id: confidence_probability} e.g. {1: 0.9, 3: 0.4} |
| |
| Returns: |
| Dict with f1_score, calibration_score (Brier), total_reward, breakdown. |
| """ |
| suspected = {n for n, conf in beliefs.items() if conf > 0.5} |
| actual = self.contaminated_nodes |
|
|
| tp = len(suspected & actual) |
| fp = len(suspected - actual) |
| fn = len(actual - suspected) |
| precision = tp / (tp + fp) if suspected else 0.0 |
| recall = tp / (tp + fn) if actual else 0.0 |
| f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0 |
|
|
| calibration = self._calculate_calibration(beliefs) |
|
|
| |
| total_reward = 0.7 * f1 + 0.3 * calibration |
|
|
| return { |
| "f1_score": round(f1, 4), |
| "calibration_score": round(calibration, 4), |
| "total_reward": round(total_reward, 4), |
| "precision": round(precision, 4), |
| "recall": round(recall, 4), |
| "breakdown": self._get_belief_breakdown(beliefs), |
| } |
|
|
| def _calculate_calibration(self, beliefs: dict) -> float: |
| """Inverted Brier score: 1 = perfect calibration, 0 = worst.""" |
| if not beliefs: |
| return 0.0 |
| brier = sum( |
| (conf - (1 if n in self.contaminated_nodes else 0)) ** 2 |
| for n, conf in beliefs.items() |
| ) |
| return round(1 - brier / len(beliefs), 4) |
|
|
| def _get_belief_breakdown(self, beliefs: dict) -> list: |
| """Classify each prediction by correctness and confidence.""" |
| breakdown = [] |
| for node_id, confidence in beliefs.items(): |
| is_cont = node_id in self.contaminated_nodes |
| if is_cont and confidence > 0.5: |
| result = "CORRECT_HIGH_CONF" |
| elif is_cont: |
| result = "MISSED_LOW_CONF" |
| elif confidence > 0.5: |
| result = "FALSE_ALARM_HIGH_CONF" |
| else: |
| result = "CORRECT_LOW_CONF" |
| breakdown.append({ |
| "node": node_id, |
| "confidence": round(confidence, 3), |
| "actually_contaminated": is_cont, |
| "result": result, |
| }) |
| return breakdown |
|
|
|
|
| |
| |
| |
|
|
| def simple_heuristic_agent(env: ContaminationEnv, n_nodes: int) -> dict: |
| """ |
| Inspect all observable nodes, infer hidden nodes causally. |
| Returns belief dict {node_id: confidence}. |
| """ |
| observable = [n for n in range(n_nodes) if n not in env.hidden_nodes] |
| hidden = list(env.hidden_nodes) |
| beliefs = {} |
|
|
| |
| for node in observable: |
| result = env.test_batch(node) |
| if result.get("test_result") == "POSITIVE": |
| beliefs[node] = 0.92 |
| elif result.get("test_result") == "NEGATIVE": |
| beliefs[node] = 0.08 |
|
|
| |
| changed = True |
| while changed: |
| changed = False |
| for h in hidden: |
| if h in beliefs: |
| continue |
| parents = list(env.graph.predecessors(h)) |
| children = list(env.graph.successors(h)) |
|
|
| |
| if any(beliefs.get(p, 0) > 0.5 for p in parents): |
| beliefs[h] = 0.85 |
| changed = True |
| continue |
|
|
| |
| if children and all(beliefs.get(c, 0) > 0.5 for c in children): |
| beliefs[h] = 0.75 |
| changed = True |
| continue |
|
|
| |
| if children: |
| pos_children = sum(1 for c in children if beliefs.get(c, 0) > 0.5) |
| ratio = pos_children / len(children) |
| if ratio > 0: |
| beliefs[h] = round(0.4 + 0.4 * ratio, 3) |
| changed = True |
|
|
| return beliefs |
|
|
|
|
| def random_agent(n_nodes: int) -> dict: |
| """Purely random baseline.""" |
| return { |
| i: float(np.random.random()) |
| for i in range(n_nodes) |
| if np.random.random() > 0.5 |
| } |
|
|
|
|
| |
| |
| |
|
|
| def train_agent(n_episodes: int = 30, difficulty: int = 3) -> tuple: |
| """Run n_episodes and track F1, calibration, and total reward.""" |
| env = ContaminationEnv(difficulty_level=difficulty) |
| rewards, f1_scores, calibration_scores = [], [], [] |
|
|
| print(f"\n{'='*55}") |
| print(f" Training Agent β {n_episodes} Episodes (difficulty={difficulty})") |
| print(f"{'='*55}") |
|
|
| for ep in range(n_episodes): |
| state = env.reset() |
| n_nodes = state["n_nodes"] |
| beliefs = simple_heuristic_agent(env, n_nodes) |
| result = env.finalize_with_beliefs(beliefs) |
|
|
| rewards.append(result["total_reward"]) |
| f1_scores.append(result["f1_score"]) |
| calibration_scores.append(result["calibration_score"]) |
|
|
| if (ep + 1) % 5 == 0: |
| print(f" Ep {ep+1:3d}/{n_episodes} | F1={result['f1_score']:.3f} " |
| f"Cal={result['calibration_score']:.3f} " |
| f"Reward={result['total_reward']:.3f}") |
|
|
| print(f"\n Final averages -> F1={np.mean(f1_scores):.3f} " |
| f"Cal={np.mean(calibration_scores):.3f} " |
| f"Reward={np.mean(rewards):.3f}") |
|
|
| return rewards, f1_scores, calibration_scores |
|
|
|
|
| |
| |
| |
|
|
| def train_with_curriculum(total_episodes: int = 50) -> tuple: |
| """Train from difficulty 1 -> 5, stepping up every 10 episodes.""" |
| env = ContaminationEnv(difficulty_level=1) |
| rewards, difficulties = [], [] |
|
|
| print(f"\n{'='*55}") |
| print(f" Curriculum Training β {total_episodes} Episodes") |
| print(f"{'='*55}") |
|
|
| for ep in range(total_episodes): |
| level = min(5, 1 + ep // 10) |
| env.set_difficulty(level) |
| state = env.reset() |
| beliefs = simple_heuristic_agent(env, state["n_nodes"]) |
| result = env.finalize_with_beliefs(beliefs) |
|
|
| rewards.append(result["total_reward"]) |
| difficulties.append(level) |
|
|
| if (ep + 1) % 10 == 0: |
| print(f" Ep {ep+1:3d}/{total_episodes} | " |
| f"Difficulty={level} Reward={result['total_reward']:.3f}") |
|
|
| return rewards, difficulties |
|
|
|
|
| |
| |
| |
|
|
| def compare_baselines(n_trials: int = 20, difficulty: int = 3) -> dict: |
| """Compare random vs heuristic agent across n_trials.""" |
| env = ContaminationEnv(difficulty_level=difficulty) |
| results = {"random": [], "heuristic": []} |
|
|
| for _ in range(n_trials): |
| state = env.reset() |
| n_nodes = state["n_nodes"] |
|
|
| |
| rg = random_agent(n_nodes) |
| results["random"].append(env.finalize_with_beliefs(rg)["f1_score"]) |
|
|
| |
| hg = simple_heuristic_agent(env, n_nodes) |
| results["heuristic"].append(env.finalize_with_beliefs(hg)["f1_score"]) |
|
|
| return {k: {"mean": round(float(np.mean(v)), 4), |
| "std": round(float(np.std(v)), 4)} |
| for k, v in results.items()} |
|
|
|
|
| |
| |
| |
|
|
| def plot_training_curves(rewards, f1_scores, calibration_scores): |
| fig, axes = plt.subplots(1, 3, figsize=(15, 4)) |
| episodes = range(1, len(rewards) + 1) |
|
|
| axes[0].plot(episodes, rewards, "b-", linewidth=2) |
| axes[0].set_xlabel("Episode"); axes[0].set_ylabel("Total Reward") |
| axes[0].set_title("Learning Curve: Total Reward"); axes[0].grid(True, alpha=0.3) |
|
|
| axes[1].plot(episodes, f1_scores, "g-", linewidth=2) |
| axes[1].set_xlabel("Episode"); axes[1].set_ylabel("F1 Score") |
| axes[1].set_title("Detection Accuracy (F1)"); axes[1].grid(True, alpha=0.3) |
|
|
| axes[2].plot(episodes, calibration_scores, "r-", linewidth=2) |
| axes[2].set_xlabel("Episode"); axes[2].set_ylabel("Calibration Score") |
| axes[2].set_title("Belief Calibration"); axes[2].grid(True, alpha=0.3) |
|
|
| plt.tight_layout() |
| path = os.path.join(PLOT_DIR, "training_curves.png") |
| plt.savefig(path, dpi=150, bbox_inches="tight") |
| plt.close() |
| print(f" Saved -> {path}") |
|
|
|
|
| def plot_curriculum(rewards, difficulties): |
| fig, ax = plt.subplots(figsize=(10, 5)) |
| ax2 = ax.twinx() |
|
|
| ax.plot(rewards, "b-", linewidth=2, label="Reward") |
| ax2.plot(difficulties, "r--", linewidth=2, label="Difficulty", alpha=0.7) |
|
|
| ax.set_xlabel("Episode"); ax.set_ylabel("Reward", color="b") |
| ax2.set_ylabel("Difficulty Level", color="r") |
| ax.set_title("Curriculum Learning: Reward vs Difficulty") |
| ax.grid(True, alpha=0.3) |
|
|
| lines1, labels1 = ax.get_legend_handles_labels() |
| lines2, labels2 = ax2.get_legend_handles_labels() |
| ax.legend(lines1 + lines2, labels1 + labels2, loc="upper left") |
|
|
| path = os.path.join(PLOT_DIR, "curriculum_learning.png") |
| plt.savefig(path, dpi=150, bbox_inches="tight") |
| plt.close() |
| print(f" Saved -> {path}") |
|
|
|
|
| def plot_baseline_comparison(baselines): |
| fig, ax = plt.subplots(figsize=(8, 6)) |
| names = list(baselines.keys()) |
| means = [baselines[k]["mean"] for k in names] |
| stds = [baselines[k]["std"] for k in names] |
| colors = ["#ff6b6b", "#6bcf7f"] |
|
|
| bars = ax.bar(names, means, yerr=stds, capsize=6, |
| color=colors, edgecolor="black", linewidth=0.8) |
| ax.set_ylabel("F1 Score", fontsize=12) |
| ax.set_title("Baseline Comparison: Detection Performance", fontsize=13, fontweight="bold") |
| ax.set_ylim(0, 1.05) |
| ax.grid(True, alpha=0.3, axis="y") |
| for bar, mean in zip(bars, means): |
| ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.02, |
| f"{mean:.3f}", ha="center", va="bottom", fontweight="bold") |
|
|
| path = os.path.join(PLOT_DIR, "baseline_comparison.png") |
| plt.savefig(path, dpi=150, bbox_inches="tight") |
| plt.close() |
| print(f" Saved -> {path}") |
|
|
|
|
| def plot_before_after(f1_scores): |
| first5 = f1_scores[:5] |
| last5 = f1_scores[-5:] |
|
|
| fig, ax = plt.subplots(figsize=(8, 6)) |
| ax.scatter([1] * len(first5), first5, s=120, alpha=0.7, color="red", label="First 5 Episodes") |
| ax.scatter([2] * len(last5), last5, s=120, alpha=0.7, color="green",label="Last 5 Episodes") |
|
|
| ax.plot([1, 2], [np.mean(first5), np.mean(last5)], "k--", linewidth=2, alpha=0.5) |
| ax.set_xticks([1, 2]); ax.set_xticklabels(["Before Training", "After Training"]) |
| ax.set_ylabel("F1 Score"); ax.set_title("Learning Progress: Before vs After") |
| ax.legend(); ax.grid(True, alpha=0.3, axis="y"); ax.set_ylim(0, 1.05) |
|
|
| path = os.path.join(PLOT_DIR, "before_after.png") |
| plt.savefig(path, dpi=150, bbox_inches="tight") |
| plt.close() |
| print(f" Saved -> {path}") |
|
|
|
|
| |
| |
| |
|
|
| def generate_all_plots_for_shreya(): |
| print("\n" + "="*55) |
| print(" Generating All Plots & Results") |
| print("="*55) |
|
|
| |
| print("\n[1/4] Training agent (30 episodes, difficulty 3)β¦") |
| rewards, f1, cal = train_agent(n_episodes=30, difficulty=3) |
| plot_training_curves(rewards, f1, cal) |
| plot_before_after(f1) |
|
|
| |
| print("\n[2/4] Curriculum training (50 episodes, difficulty 1->5)β¦") |
| cur_rewards, cur_diff = train_with_curriculum(total_episodes=50) |
| plot_curriculum(cur_rewards, cur_diff) |
|
|
| |
| print("\n[3/4] Baseline comparison (20 trials)β¦") |
| baselines = compare_baselines(n_trials=20, difficulty=3) |
| plot_baseline_comparison(baselines) |
|
|
| |
| print("\n[4/4] Saving results JSONβ¦") |
| data = { |
| "training": { |
| "n_episodes": 30, |
| "difficulty": 3, |
| "final_f1": float(f1[-1]), |
| "final_calibration": float(cal[-1]), |
| "final_reward": float(rewards[-1]), |
| "avg_f1": round(float(np.mean(f1)), 4), |
| "avg_calibration": round(float(np.mean(cal)), 4), |
| "avg_reward": round(float(np.mean(rewards)), 4), |
| "improvement_f1": round(float(f1[-1] - f1[0]), 4), |
| }, |
| "curriculum": { |
| "n_episodes": 50, |
| "final_reward": float(cur_rewards[-1]), |
| "avg_reward": round(float(np.mean(cur_rewards)), 4), |
| }, |
| "baselines": baselines, |
| "plots": [ |
| os.path.join(PLOT_DIR, f) |
| for f in ["training_curves.png", "before_after.png", |
| "curriculum_learning.png", "baseline_comparison.png"] |
| ], |
| } |
| with open(RESULTS_FILE, "w") as fh: |
| json.dump(data, fh, indent=2) |
| print(f" Saved -> {RESULTS_FILE}") |
|
|
| print("\n" + "="*55) |
| print(" RESULTS FOR SHREYA") |
| print("="*55) |
| t = data["training"] |
| print(f" Avg F1 Score : {t['avg_f1']:.3f}") |
| print(f" Avg Calibration : {t['avg_calibration']:.3f}") |
| print(f" Avg Total Reward : {t['avg_reward']:.3f}") |
| print(f" F1 Improvement : +{t['improvement_f1']:.3f}") |
| print(f"\n Baselines (F1):") |
| for name, stats in baselines.items(): |
| print(f" {name:12s}: {stats['mean']:.3f} Β± {stats['std']:.3f}") |
| print(f" All plots saved to -> {PLOT_DIR}/") |
| print("="*55) |
|
|
| return data |
|
|
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| print("RecallTrace β Tasks 1-9 Simulation") |
| print("="*55) |
|
|
| |
| print("\n[SANITY] 10-episode automated agent runβ¦") |
| f1_history = [] |
| for ep in range(10): |
| env = ContaminationEnv(difficulty_level=3) |
| state = env.reset() |
| beliefs = simple_heuristic_agent(env, state["n_nodes"]) |
| r = env.finalize_with_beliefs(beliefs) |
| f1_history.append(r["f1_score"]) |
| print(f" Ep {ep+1:2d} | nodes={state['n_nodes']:2d} " |
| f"| hidden={state['n_hidden']} " |
| f"| F1={r['f1_score']:.3f} " |
| f"| Cal={r['calibration_score']:.3f} " |
| f"| Reward={r['total_reward']:.3f}") |
| print(f" => Mean F1 over 10 episodes: {np.mean(f1_history):.3f}") |
|
|
| |
| print("\n[TASK 5] Belief calibration exampleβ¦") |
| env = ContaminationEnv(difficulty_level=3) |
| env.reset() |
| demo_beliefs = { |
| n: float(np.random.random()) |
| for n in range(env.graph.number_of_nodes()) |
| } |
| result = env.finalize_with_beliefs(demo_beliefs) |
| print(f" F1={result['f1_score']:.3f} " |
| f"Calibration={result['calibration_score']:.3f} " |
| f"Total Reward={result['total_reward']:.3f}") |
|
|
| |
| data = generate_all_plots_for_shreya() |
| print("All done! Done") |
|
|