Search commited on
Commit
9a63f8a
·
1 Parent(s): 2f72321

auto: sync run_qwen_cross_profile_generation_synthesis.py

Browse files
scripts/run_qwen_cross_profile_generation_synthesis.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import math
6
+ from collections import defaultdict
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ ROOT = Path(__file__).resolve().parents[1]
12
+ ARCHIVE = ROOT / "archive"
13
+
14
+ PROFILE_BINS = {
15
+ (3, 5): "short",
16
+ (5, 7): "medium",
17
+ (7, 10): "long",
18
+ }
19
+
20
+
21
+ def infer_profile(token_count: int) -> str:
22
+ for (lo, hi), label in PROFILE_BINS.items():
23
+ if lo <= token_count < hi:
24
+ return label
25
+ return "unknown"
26
+
27
+
28
+ def finite(value: Any) -> float | None:
29
+ try:
30
+ v = float(value)
31
+ return v if math.isfinite(v) else None
32
+ except (TypeError, ValueError):
33
+ return None
34
+
35
+
36
+ def load_generation_results() -> list[dict[str, Any]]:
37
+ records: list[dict[str, Any]] = []
38
+ for path in sorted(ARCHIVE.glob("*.json")):
39
+ try:
40
+ payload = json.loads(path.read_text(encoding="utf-8"))
41
+ except Exception:
42
+ continue
43
+ cases = payload.get("cases")
44
+ if not isinstance(cases, list):
45
+ continue
46
+ for case in cases:
47
+ if not isinstance(case, dict):
48
+ continue
49
+ delta = finite(case.get("constraint_delta"))
50
+ if delta is None:
51
+ continue
52
+ token_count = case.get("span_match", {}).get("token_count")
53
+ if token_count is None:
54
+ continue
55
+ records.append({
56
+ "name": case.get("name", "?"),
57
+ "anchor_group": case.get("anchor_group", "?"),
58
+ "anchor_class": case.get("anchor_class", "?"),
59
+ "token_count": int(token_count),
60
+ "profile": infer_profile(int(token_count)),
61
+ "constraint_delta": delta,
62
+ "base_score": finite(case.get("base_analysis", {}).get("constraint_score")),
63
+ "anchor_score": finite(case.get("anchor_analysis", {}).get("constraint_score")),
64
+ "source": path.name,
65
+ })
66
+ return records
67
+
68
+
69
+ def build_summary(records: list[dict[str, Any]]) -> dict[str, Any]:
70
+ by_case_profile: dict[tuple[str, str], list[float]] = defaultdict(list)
71
+ by_profile: dict[str, list[float]] = defaultdict(list)
72
+ by_group: dict[str, list[float]] = defaultdict(list)
73
+
74
+ for r in records:
75
+ key = (r["name"], r["profile"])
76
+ by_case_profile[key].append(r["constraint_delta"])
77
+ by_profile[r["profile"]].append(r["constraint_delta"])
78
+ by_group[r["anchor_group"]].append(r["constraint_delta"])
79
+
80
+ case_profile_stats = {}
81
+ for (name, profile), deltas in sorted(by_case_profile.items()):
82
+ case_profile_stats[f"{name}__{profile}"] = {
83
+ "n": len(deltas),
84
+ "mean_delta": sum(deltas) / len(deltas),
85
+ "deltas": deltas,
86
+ }
87
+
88
+ profile_stats = {}
89
+ for profile, deltas in sorted(by_profile.items()):
90
+ profile_stats[profile] = {
91
+ "n": len(deltas),
92
+ "mean_delta": sum(deltas) / len(deltas),
93
+ "min_delta": min(deltas),
94
+ "max_delta": max(deltas),
95
+ }
96
+
97
+ cases_with_multiple_profiles = []
98
+ case_names = set(r["name"] for r in records)
99
+ for name in sorted(case_names):
100
+ profiles_seen = set(r["profile"] for r in records if r["name"] == name)
101
+ if len(profiles_seen) > 1:
102
+ means = {}
103
+ for p in profiles_seen:
104
+ p_records = [r for r in records if r["name"] == name and r["profile"] == p]
105
+ means[p] = sum(r["constraint_delta"] for r in p_records) / len(p_records)
106
+ cases_with_multiple_profiles.append({
107
+ "name": name,
108
+ "profiles": sorted(profiles_seen),
109
+ "mean_delta_by_profile": means,
110
+ "profile_effect": max(means.values()) - min(means.values()),
111
+ })
112
+
113
+ profile_effect_confirmed = any(
114
+ c["profile_effect"] > 0.5 for c in cases_with_multiple_profiles
115
+ )
116
+
117
+ return {
118
+ "n_total_records": len(records),
119
+ "n_unique_cases": len(case_names),
120
+ "n_cases_with_multiple_profiles": len(cases_with_multiple_profiles),
121
+ "profile_effect_confirmed": profile_effect_confirmed,
122
+ "cases_with_multiple_profiles": cases_with_multiple_profiles,
123
+ "by_profile": profile_stats,
124
+ "by_case_profile": case_profile_stats,
125
+ "raw_records": records,
126
+ }
127
+
128
+
129
+ def main() -> None:
130
+ parser = argparse.ArgumentParser()
131
+ parser.add_argument("--output_json", type=Path,
132
+ default=ROOT / "archive" / "P3_cross_profile_generation_synthesis.json")
133
+ args = parser.parse_args()
134
+
135
+ records = load_generation_results()
136
+ summary = build_summary(records)
137
+
138
+ payload = {
139
+ "generated_at_utc": datetime.now(timezone.utc).isoformat(),
140
+ "summary": summary,
141
+ }
142
+ args.output_json.parent.mkdir(parents=True, exist_ok=True)
143
+ args.output_json.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
144
+
145
+ print(f"records={len(records)}")
146
+ print(f"cases_with_multi_profile={summary['n_cases_with_multiple_profiles']}")
147
+ print(f"profile_effect_confirmed={summary['profile_effect_confirmed']}")
148
+ for c in summary["cases_with_multiple_profiles"]:
149
+ print(f" {c['name']}: effect={c['profile_effect']:.2f} {c['mean_delta_by_profile']}")
150
+ print(f"===FINAL_RESULT==={json.dumps({k: v for k, v in summary.items() if k != 'raw_records'})}")
151
+
152
+
153
+ if __name__ == "__main__":
154
+ main()