Search commited on
Commit
136d537
·
1 Parent(s): 20ccbfa

auto: sync run_qwen_injection_layer_ablation.py

Browse files
scripts/run_qwen_injection_layer_ablation.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import math
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+
11
+ ROOT = Path(__file__).resolve().parents[1]
12
+ DEFAULT_INPUT = ROOT / "archive" / "qwen35_4b_injection_geometry_medium.json"
13
+ DEFAULT_OUTPUT_JSON = ROOT / "archive" / "qwen_injection_layer_ablation.json"
14
+ DEFAULT_OUTPUT_MD = ROOT / "docs" / "research" / "qwen_injection_layer_ablation.md"
15
+
16
+
17
+ def compute_auc(negative_scores: list[float], positive_scores: list[float]) -> float | None:
18
+ if not negative_scores or not positive_scores:
19
+ return None
20
+ wins = 0.0
21
+ total = 0
22
+ for negative in negative_scores:
23
+ for positive in positive_scores:
24
+ total += 1
25
+ if positive > negative:
26
+ wins += 1.0
27
+ elif positive == negative:
28
+ wins += 0.5
29
+ return float(wins / total) if total else None
30
+
31
+
32
+ def finite_float(value: Any) -> float | None:
33
+ try:
34
+ out = float(value)
35
+ except (TypeError, ValueError):
36
+ return None
37
+ return out if math.isfinite(out) else None
38
+
39
+
40
+ def mean_vector(vectors: list[list[float]]) -> list[float]:
41
+ if not vectors:
42
+ return []
43
+ dims = len(vectors[0])
44
+ return [
45
+ float(sum(vector[dim] for vector in vectors) / len(vectors))
46
+ for dim in range(dims)
47
+ ]
48
+
49
+
50
+ def compute_scale(vectors: list[list[float]]) -> list[float]:
51
+ if not vectors:
52
+ return []
53
+ dims = len(vectors[0])
54
+ scales: list[float] = []
55
+ for dim in range(dims):
56
+ column = [float(vector[dim]) for vector in vectors]
57
+ mean_value = sum(column) / len(column)
58
+ variance = sum((value - mean_value) ** 2 for value in column) / max(1, len(column))
59
+ scales.append(max(variance ** 0.5, 1e-6))
60
+ return scales
61
+
62
+
63
+ def standardized_l1_distance(vector: list[float], prototype: list[float], scale: list[float]) -> float:
64
+ if not vector or not prototype or not scale:
65
+ return 0.0
66
+ total = 0.0
67
+ for value, proto, denom in zip(vector, prototype, scale):
68
+ total += abs(float(value) - float(proto)) / max(float(denom), 1e-6)
69
+ return float(total / len(vector))
70
+
71
+
72
+ def vector_for_subset(row: dict[str, Any], features: list[tuple[int, str]]) -> list[float] | None:
73
+ layer_metrics = row.get("layer_metrics")
74
+ if not isinstance(layer_metrics, dict):
75
+ return None
76
+ values: list[float] = []
77
+ for layer, metric in features:
78
+ metric_map = layer_metrics.get(str(layer))
79
+ if not isinstance(metric_map, dict):
80
+ return None
81
+ value = finite_float(metric_map.get(metric))
82
+ if value is None:
83
+ return None
84
+ values.append(value)
85
+ return values
86
+
87
+
88
+ def distance_auc_for_features(rows: list[dict[str, Any]], features: list[tuple[int, str]]) -> dict[str, Any]:
89
+ legit_rows = [row for row in rows if row.get("label") == "legit" and row.get("status") == "ok"]
90
+ injected_rows = [row for row in rows if row.get("label") == "injected" and row.get("status") == "ok"]
91
+
92
+ row_vectors: dict[int, list[float]] = {}
93
+ for row in legit_rows + injected_rows:
94
+ vector = vector_for_subset(row, features)
95
+ if vector is not None:
96
+ row_vectors[id(row)] = vector
97
+
98
+ legit_vectors = [row_vectors[id(row)] for row in legit_rows if id(row) in row_vectors]
99
+ scale = compute_scale(legit_vectors)
100
+ legit_by_group: dict[str, list[list[float]]] = {}
101
+ for row in legit_rows:
102
+ vector = row_vectors.get(id(row))
103
+ if vector is None:
104
+ continue
105
+ legit_by_group.setdefault(str(row.get("anchor_group")), []).append(vector)
106
+
107
+ legit_scores: list[float] = []
108
+ injected_scores: list[float] = []
109
+ for row in legit_rows:
110
+ vector = row_vectors.get(id(row))
111
+ if vector is None:
112
+ continue
113
+ group = str(row.get("anchor_group"))
114
+ candidates = [
115
+ other_vector
116
+ for other in legit_rows
117
+ if other is not row and str(other.get("anchor_group")) == group
118
+ for other_vector in [row_vectors.get(id(other))]
119
+ if other_vector is not None
120
+ ]
121
+ if not candidates:
122
+ continue
123
+ legit_scores.append(standardized_l1_distance(vector, mean_vector(candidates), scale))
124
+
125
+ for row in injected_rows:
126
+ vector = row_vectors.get(id(row))
127
+ if vector is None:
128
+ continue
129
+ prototype = mean_vector(legit_by_group.get(str(row.get("anchor_group")), []))
130
+ if not prototype:
131
+ continue
132
+ injected_scores.append(standardized_l1_distance(vector, prototype, scale))
133
+
134
+ auc = compute_auc(legit_scores, injected_scores)
135
+ return {
136
+ "auc": auc,
137
+ "n_legit": len(legit_scores),
138
+ "n_injected": len(injected_scores),
139
+ "legit_mean_distance": mean_or_none(legit_scores),
140
+ "injected_mean_distance": mean_or_none(injected_scores),
141
+ }
142
+
143
+
144
+ def raw_metric_auc(rows: list[dict[str, Any]], layer: int, metric: str) -> dict[str, Any]:
145
+ legit_values: list[float] = []
146
+ injected_values: list[float] = []
147
+ for row in rows:
148
+ if row.get("status") != "ok":
149
+ continue
150
+ value = vector_for_subset(row, [(layer, metric)])
151
+ if value is None:
152
+ continue
153
+ if row.get("label") == "legit":
154
+ legit_values.append(value[0])
155
+ elif row.get("label") == "injected":
156
+ injected_values.append(value[0])
157
+ auc = compute_auc(legit_values, injected_values)
158
+ if auc is None:
159
+ return {
160
+ "layer": layer,
161
+ "metric": metric,
162
+ "auc": None,
163
+ "separation_auc": None,
164
+ "direction": "unknown",
165
+ }
166
+ if auc >= 0.5:
167
+ direction = "higher_for_injected"
168
+ separation_auc = auc
169
+ else:
170
+ direction = "lower_for_injected"
171
+ separation_auc = 1.0 - auc
172
+ return {
173
+ "layer": layer,
174
+ "metric": metric,
175
+ "auc": auc,
176
+ "separation_auc": separation_auc,
177
+ "direction": direction,
178
+ "legit_mean": mean_or_none(legit_values),
179
+ "injected_mean": mean_or_none(injected_values),
180
+ }
181
+
182
+
183
+ def mean_or_none(values: list[float]) -> float | None:
184
+ if not values:
185
+ return None
186
+ return float(sum(values) / len(values))
187
+
188
+
189
+ def infer_layers_and_metrics(rows: list[dict[str, Any]]) -> tuple[list[int], list[str]]:
190
+ for row in rows:
191
+ layer_metrics = row.get("layer_metrics")
192
+ if not isinstance(layer_metrics, dict):
193
+ continue
194
+ layers = sorted(int(layer) for layer in layer_metrics)
195
+ metrics: list[str] = []
196
+ for layer in layers:
197
+ metric_map = layer_metrics.get(str(layer))
198
+ if isinstance(metric_map, dict):
199
+ metrics = list(metric_map.keys())
200
+ break
201
+ if layers and metrics:
202
+ return layers, metrics
203
+ raise ValueError("no layer_metrics found in input samples")
204
+
205
+
206
+ def build_layer_features(layers: list[int], metrics: list[str]) -> list[tuple[int, str]]:
207
+ return [(layer, metric) for layer in layers for metric in metrics]
208
+
209
+
210
+ def analyze(payload: dict[str, Any]) -> dict[str, Any]:
211
+ rows = payload.get("samples")
212
+ if not isinstance(rows, list):
213
+ raise ValueError("input JSON must contain a samples list")
214
+ layers, metrics = infer_layers_and_metrics(rows)
215
+ all_features = build_layer_features(layers, metrics)
216
+
217
+ per_layer = [
218
+ {
219
+ "layer": layer,
220
+ **distance_auc_for_features(rows, build_layer_features([layer], metrics)),
221
+ }
222
+ for layer in layers
223
+ ]
224
+ per_metric = [
225
+ {
226
+ "metric": metric,
227
+ **distance_auc_for_features(rows, [(layer, metric) for layer in layers]),
228
+ }
229
+ for metric in metrics
230
+ ]
231
+ per_layer_metric = [
232
+ raw_metric_auc(rows, layer, metric)
233
+ for layer in layers
234
+ for metric in metrics
235
+ ]
236
+
237
+ crystal_layers = [layer for layer in layers if 4 <= layer <= 8]
238
+ handoff_layers = [layer for layer in layers if layer >= 24]
239
+ mid_layers = [layer for layer in layers if 9 <= layer < 24]
240
+
241
+ subsets = {
242
+ "all_probe_layers": all_features,
243
+ "crystallization_zone_4_8": build_layer_features(crystal_layers, metrics),
244
+ "mid_layers_9_23": build_layer_features(mid_layers, metrics),
245
+ "handoff_layers_24_plus": build_layer_features(handoff_layers, metrics),
246
+ }
247
+ subset_results = {
248
+ name: distance_auc_for_features(rows, features) if features else {"auc": None}
249
+ for name, features in subsets.items()
250
+ }
251
+
252
+ best_layer = max(per_layer, key=lambda item: item.get("auc") if item.get("auc") is not None else -1.0)
253
+ best_metric = max(per_metric, key=lambda item: item.get("auc") if item.get("auc") is not None else -1.0)
254
+ best_layer_metric = max(
255
+ per_layer_metric,
256
+ key=lambda item: item.get("separation_auc") if item.get("separation_auc") is not None else -1.0,
257
+ )
258
+
259
+ return {
260
+ "generated_at_utc": datetime.now(timezone.utc).isoformat(),
261
+ "source_metadata": payload.get("metadata", {}),
262
+ "layers": layers,
263
+ "metrics": metrics,
264
+ "summary": {
265
+ "source_detection_auc": payload.get("summary", {}).get("detection_auc"),
266
+ "all_probe_layers_auc": subset_results["all_probe_layers"].get("auc"),
267
+ "crystallization_zone_auc": subset_results["crystallization_zone_4_8"].get("auc"),
268
+ "mid_layers_auc": subset_results["mid_layers_9_23"].get("auc"),
269
+ "handoff_layers_auc": subset_results["handoff_layers_24_plus"].get("auc"),
270
+ "best_single_layer": best_layer.get("layer"),
271
+ "best_single_layer_auc": best_layer.get("auc"),
272
+ "best_metric": best_metric.get("metric"),
273
+ "best_metric_auc": best_metric.get("auc"),
274
+ "best_layer_metric": {
275
+ "layer": best_layer_metric.get("layer"),
276
+ "metric": best_layer_metric.get("metric"),
277
+ "separation_auc": best_layer_metric.get("separation_auc"),
278
+ "direction": best_layer_metric.get("direction"),
279
+ },
280
+ },
281
+ "subsets": subset_results,
282
+ "per_layer": sorted(per_layer, key=lambda item: item.get("auc") if item.get("auc") is not None else -1.0, reverse=True),
283
+ "per_metric": sorted(per_metric, key=lambda item: item.get("auc") if item.get("auc") is not None else -1.0, reverse=True),
284
+ "per_layer_metric": sorted(
285
+ per_layer_metric,
286
+ key=lambda item: item.get("separation_auc") if item.get("separation_auc") is not None else -1.0,
287
+ reverse=True,
288
+ ),
289
+ }
290
+
291
+
292
+ def write_markdown(result: dict[str, Any], path: Path) -> None:
293
+ summary = result["summary"]
294
+ lines = [
295
+ "# Qwen injection geometry layer ablation",
296
+ "",
297
+ f"Generated: `{result['generated_at_utc']}`",
298
+ "",
299
+ "## Summary",
300
+ "",
301
+ f"- source_detection_auc: `{summary.get('source_detection_auc')}`",
302
+ f"- all_probe_layers_auc: `{summary.get('all_probe_layers_auc')}`",
303
+ f"- crystallization_zone_auc: `{summary.get('crystallization_zone_auc')}`",
304
+ f"- mid_layers_auc: `{summary.get('mid_layers_auc')}`",
305
+ f"- handoff_layers_auc: `{summary.get('handoff_layers_auc')}`",
306
+ f"- best_single_layer: `L{summary.get('best_single_layer')}` auc=`{summary.get('best_single_layer_auc')}`",
307
+ f"- best_metric: `{summary.get('best_metric')}` auc=`{summary.get('best_metric_auc')}`",
308
+ f"- best_layer_metric: `{summary.get('best_layer_metric')}`",
309
+ "",
310
+ "## Per-layer distance AUC",
311
+ "",
312
+ "| Layer | AUC | Legit mean dist | Injected mean dist |",
313
+ "|---:|---:|---:|---:|",
314
+ ]
315
+ for row in result["per_layer"]:
316
+ lines.append(
317
+ f"| {row.get('layer')} | {row.get('auc')} | "
318
+ f"{row.get('legit_mean_distance')} | {row.get('injected_mean_distance')} |"
319
+ )
320
+ lines.extend([
321
+ "",
322
+ "## Top layer-metric raw separations",
323
+ "",
324
+ "| Layer | Metric | Separation AUC | Direction |",
325
+ "|---:|---|---:|---|",
326
+ ])
327
+ for row in result["per_layer_metric"][:12]:
328
+ lines.append(
329
+ f"| {row.get('layer')} | {row.get('metric')} | "
330
+ f"{row.get('separation_auc')} | {row.get('direction')} |"
331
+ )
332
+ path.parent.mkdir(parents=True, exist_ok=True)
333
+ path.write_text("\n".join(lines) + "\n", encoding="utf-8")
334
+
335
+
336
+ def build_parser() -> argparse.ArgumentParser:
337
+ parser = argparse.ArgumentParser(description="Layer ablation for saved Qwen injection geometry samples.")
338
+ parser.add_argument("--input_json", "--input-json", dest="input_json", type=Path, default=DEFAULT_INPUT)
339
+ parser.add_argument("--output_json", "--output-json", dest="output_json", type=Path, default=DEFAULT_OUTPUT_JSON)
340
+ parser.add_argument("--output_md", "--output-md", dest="output_md", type=Path, default=DEFAULT_OUTPUT_MD)
341
+ return parser
342
+
343
+
344
+ def main() -> None:
345
+ args = build_parser().parse_args()
346
+ payload = json.loads(args.input_json.read_text(encoding="utf-8"))
347
+ result = analyze(payload)
348
+ args.output_json.parent.mkdir(parents=True, exist_ok=True)
349
+ args.output_json.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
350
+ write_markdown(result, args.output_md)
351
+ print(f"saved_json={args.output_json}")
352
+ print(f"saved_md={args.output_md}")
353
+ print(f"best_single_layer_auc={result['summary']['best_single_layer_auc']}")
354
+
355
+
356
+ if __name__ == "__main__":
357
+ main()