ConorWang commited on
Commit
4fb8d93
·
verified ·
1 Parent(s): 10d30bd

Delete uncertainty_adapter_train_result.json

Browse files
Files changed (1) hide show
  1. uncertainty_adapter_train_result.json +0 -564
uncertainty_adapter_train_result.json DELETED
@@ -1,564 +0,0 @@
1
- {
2
- "artifacts": {
3
- "adapter_dir": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/adapter",
4
- "best_checkpoint_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/best_checkpoint_manifest.json",
5
- "epoch_history": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/epoch_history.json",
6
- "eval_jsonl": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_eval.jsonl",
7
- "host_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/host_manifest.json",
8
- "plan_json": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_adapter_plan.json",
9
- "tokenizer_dir": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/tokenizer",
10
- "train_jsonl": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_train.jsonl",
11
- "training_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_training_manifest.json",
12
- "uncertainty_head": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_head.pt"
13
- },
14
- "dataset": {
15
- "eval_modes": {
16
- "conflicting_evidence": 5,
17
- "evidence_gap": 5,
18
- "exec_required": 5,
19
- "high_risk": 5,
20
- "low_uncertainty": 5,
21
- "patch_pending": 5,
22
- "reverse_engineering_ambiguity": 5,
23
- "self_check_failure": 5,
24
- "spec_mismatch": 5,
25
- "validator_negation": 5,
26
- "worktree_conflict": 5
27
- },
28
- "eval_size": 55,
29
- "train_modes": {
30
- "conflicting_evidence": 14,
31
- "evidence_gap": 14,
32
- "exec_required": 14,
33
- "high_risk": 14,
34
- "low_uncertainty": 14,
35
- "patch_pending": 14,
36
- "reverse_engineering_ambiguity": 14,
37
- "self_check_failure": 14,
38
- "spec_mismatch": 14,
39
- "validator_negation": 14,
40
- "worktree_conflict": 14
41
- },
42
- "train_size": 154
43
- },
44
- "eval_metrics": {
45
- "adapter_exported": true,
46
- "auto_lora_from_ia3": false,
47
- "best_epoch": 2,
48
- "best_quality_score": 0.8625935807221907,
49
- "count": 55,
50
- "mae": {
51
- "u_answer": 0.15174226462841034,
52
- "u_evidence": 0.19610758125782013,
53
- "u_exec": 0.18561214208602905,
54
- "u_risk": 0.1553734689950943,
55
- "u_spec": 0.21633382141590118
56
- },
57
- "mean_mae": 0.18103384971618652,
58
- "mean_rmse": 0.24169571697711945,
59
- "moderate_accuracy": 0.6727272727272727,
60
- "peft_method": "lora_narrow",
61
- "quality_score": 0.8625935807221907,
62
- "rmse": {
63
- "u_answer": 0.18724055588245392,
64
- "u_evidence": 0.22527915239334106,
65
- "u_exec": 0.25238174200057983,
66
- "u_risk": 0.20667441189289093,
67
- "u_spec": 0.3369026482105255
68
- },
69
- "tight_accuracy": 0.4,
70
- "used_peft": true,
71
- "weighted_mae": 0.18083095811830807,
72
- "weighted_rmse": 0.24125460771003793
73
- },
74
- "plan": {
75
- "adapter_alpha": 16,
76
- "adapter_dropout": 0.05,
77
- "adapter_rank": 8,
78
- "backbone_manifest": {
79
- "host_inventory_size": 79
80
- },
81
- "config": {
82
- "allow_backbone_bridge": false,
83
- "backbone": "/public/wang_libo/veriloop_coder_e1/model",
84
- "bf16": true,
85
- "cache_dir": null,
86
- "cpu_max_memory_gib": 96,
87
- "dataset_jsonl": null,
88
- "dim_weights": {
89
- "u_answer": 1.1,
90
- "u_evidence": 1.35,
91
- "u_exec": 1.35,
92
- "u_risk": 1.55,
93
- "u_spec": 1.25
94
- },
95
- "early_stopping_min_delta": 0.001,
96
- "early_stopping_patience": 2,
97
- "enable_synthetic_dataset": true,
98
- "eval_jsonl": null,
99
- "eval_samples_per_mode": 5,
100
- "fp16": false,
101
- "gpu_max_memory_gib": 44,
102
- "gradient_accumulation_steps": 16,
103
- "host_dropout": 0.0,
104
- "learning_rate": 2e-05,
105
- "local_files_only": true,
106
- "logging_steps": 10,
107
- "max_grad_norm": 1.0,
108
- "max_length": 1664,
109
- "min_epochs_before_early_stop": 2,
110
- "num_train_epochs": 4.0,
111
- "output_dir": "./outputs/uncertainty_qwen36_rootfix_run1",
112
- "per_device_eval_batch_size": 1,
113
- "per_device_train_batch_size": 1,
114
- "prefer_best_checkpoint_export": true,
115
- "probe_dropout": 0.03,
116
- "probe_hidden_factor": 0.75,
117
- "product_line": "veriloop_coder",
118
- "quantization_mode": "4bit",
119
- "quantization_required": false,
120
- "require_trainable_targets": true,
121
- "revision": null,
122
- "run_post_train_eval": true,
123
- "save_best_checkpoint": true,
124
- "seed": 11,
125
- "selection_mode": "minimal",
126
- "train_samples_per_mode": 14,
127
- "training_mode": "mounted_head",
128
- "trust_remote_code": true,
129
- "use_double_quant": true,
130
- "warmup_ratio": 0.05,
131
- "weight_decay": 0.0,
132
- "weighted_mae_penalty": 0.5,
133
- "weighted_rmse_penalty": 0.5
134
- },
135
- "dataset_summary": {
136
- "eval_modes": {
137
- "conflicting_evidence": 5,
138
- "evidence_gap": 5,
139
- "exec_required": 5,
140
- "high_risk": 5,
141
- "low_uncertainty": 5,
142
- "patch_pending": 5,
143
- "reverse_engineering_ambiguity": 5,
144
- "self_check_failure": 5,
145
- "spec_mismatch": 5,
146
- "validator_negation": 5,
147
- "worktree_conflict": 5
148
- },
149
- "eval_size": 55,
150
- "train_modes": {
151
- "conflicting_evidence": 14,
152
- "evidence_gap": 14,
153
- "exec_required": 14,
154
- "high_risk": 14,
155
- "low_uncertainty": 14,
156
- "patch_pending": 14,
157
- "reverse_engineering_ambiguity": 14,
158
- "self_check_failure": 14,
159
- "spec_mismatch": 14,
160
- "validator_negation": 14,
161
- "worktree_conflict": 14
162
- },
163
- "train_size": 154
164
- },
165
- "head_strategy": "host_head",
166
- "notes": [
167
- "Primary route is host-surface-first uncertainty training.",
168
- "Validator receipts, self-check failure, reverse-engineering ambiguity, worktree conflicts, and patch continuity are first-class signals.",
169
- "DualPath, Full AttnRes, mHC, routers, experts, and broad attention-layer PEFT stay excluded.",
170
- "This adapter should improve runtime uncertainty routing, not general coding free-formity."
171
- ],
172
- "peft_method": "lora_narrow",
173
- "recipe": {
174
- "adapter_family": "uncertainty",
175
- "backbone": "/public/wang_libo/veriloop_coder_e1/model",
176
- "backbone_family": "qwen_dense",
177
- "excluded_patterns": [
178
- "(?i)\\bdualpath\\b",
179
- "(?i)\\bmhc\\b",
180
- "(?i)\\bfull[_\\- ]?attnres\\b",
181
- "(?i)\\battnres(_full)?\\b",
182
- "(?i)\\brouter\\b",
183
- "(?i)\\bexperts?\\b",
184
- "(?i)\\bmoe\\b.*\\b(gate|router|expert)\\b",
185
- "(?i)\\brope\\b.*\\b(freq|inv_freq|theta|rotary)\\b",
186
- "(?i)\\bkvcache\\b",
187
- "(?i)\\bposition_embedding\\b",
188
- "(?i)\\bembed(tokens|ding)?\\b",
189
- "(?i)\\blm_head\\b"
190
- ],
191
- "harness_constraints": [
192
- "Harness Engineering remains the primary convergence layer.",
193
- "Adapter must not bypass runtime orchestrator / validator / rollback loops.",
194
- "Adapter outputs remain subordinate to VeriLoop control-plane decisions.",
195
- "Adapter must not create hidden prompt-style memory authority.",
196
- "Adapter must support bounded uncertainty calibration rather than generic hesitation.",
197
- "Validator and receipt evidence must remain able to update uncertainty."
198
- ],
199
- "hyperparams": {
200
- "alpha": 16,
201
- "bias": "none",
202
- "dropout": 0.05,
203
- "fan_in_fan_out": false,
204
- "modules_to_save": [
205
- "input_layernorm"
206
- ],
207
- "r": 8,
208
- "task_type": "CAUSAL_LM"
209
- },
210
- "merge_policy": "merge_after_guard",
211
- "metadata": {
212
- "allow_backbone_bridge": false,
213
- "allow_vla_action_expert": false,
214
- "harness_first": true,
215
- "prefer_explicit_heads": true,
216
- "prefer_qlora_for_backbone_bridge": true,
217
- "require_harness_first": true,
218
- "selector_group_count": 2,
219
- "strict_narrow_scope": true,
220
- "trainer": "veriloop.uncertainty_adapter_trainer.v5.qwen36",
221
- "uncertainty_training": true
222
- },
223
- "notes": [
224
- "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op.",
225
- "Backbone family inferred as qwen_dense.",
226
- "PEFT method resolved as lora_narrow.",
227
- "Recipe is harness-first: runtime convergence remains in VeriLoop control-plane + harness, not in broad weight surgery.",
228
- "Block AttnRes, DualPath, mHC hooks, RoPE, KV-cache, and broad MoE routing remain structurally excluded."
229
- ],
230
- "peft_method": "lora_narrow",
231
- "precision_policy": "auto",
232
- "product_line": "veriloop_coder",
233
- "regression_requirements": [
234
- "Must pass PEFT regression guard structural policy checks.",
235
- "Must not introduce forbidden backbone/serving structural targets.",
236
- "Must preserve harness regression envelope for the selected product line.",
237
- "Budgeted uncertainty convergence must not regress.",
238
- "Uncertainty calibration must not collapse into generic caution."
239
- ],
240
- "target_groups": [
241
- {
242
- "alpha": 16,
243
- "dropout": 0.0,
244
- "name": "group_1_custom_control_head",
245
- "rank": 8,
246
- "rationale": "Prefer explicit uncertainty / calibration heads over backbone surgery.",
247
- "surface": "custom_control_head",
248
- "target_modules": [
249
- "uncertainty_head",
250
- "uncertainty_head.calibration_mlp",
251
- "uncertainty_head.proj"
252
- ]
253
- },
254
- {
255
- "alpha": 16,
256
- "dropout": 0.0,
257
- "name": "group_2_custom_validator_bridge",
258
- "rank": 8,
259
- "rationale": "Validation and rollback fidelity should prefer explicit validator / rollback bridges.",
260
- "surface": "custom_validator_bridge",
261
- "target_modules": [
262
- "failure_signal_bridge",
263
- "failure_signal_bridge.rollback_bridge",
264
- "rollback_adapter",
265
- "rollback_adapter.head",
266
- "sandbox_rollback_bridge",
267
- "sandbox_rollback_bridge.adapter",
268
- "validator_feedback_bridge",
269
- "validator_feedback_bridge.adapter",
270
- "validator_feedback_loop.rollback_adapter"
271
- ]
272
- }
273
- ],
274
- "target_modules": [
275
- "uncertainty_head",
276
- "uncertainty_head.calibration_mlp",
277
- "uncertainty_head.proj",
278
- "failure_signal_bridge",
279
- "failure_signal_bridge.rollback_bridge",
280
- "rollback_adapter",
281
- "rollback_adapter.head",
282
- "sandbox_rollback_bridge",
283
- "sandbox_rollback_bridge.adapter",
284
- "validator_feedback_bridge",
285
- "validator_feedback_bridge.adapter",
286
- "validator_feedback_loop.rollback_adapter"
287
- ],
288
- "version": "veriloop.lora_recipe_veriloop.v2"
289
- },
290
- "selected_target_modules": [
291
- "uncertainty_head",
292
- "uncertainty_head.calibration_mlp",
293
- "uncertainty_head.proj"
294
- ],
295
- "target_selection": {
296
- "backbone_archetype": "qwen_dense",
297
- "exclusions": [
298
- {
299
- "pattern": "(^|\\.)lm_head($|\\.)",
300
- "reason": "Do not retune final token head; too broad and evaluation-heavy."
301
- },
302
- {
303
- "pattern": "(^|\\.)embed_tokens($|\\.)",
304
- "reason": "Embedding surgery risks broad semantic drift."
305
- },
306
- {
307
- "pattern": "(^|\\.)norm($|\\.)",
308
- "reason": "Global norm tuning can destabilize calibration across scenes."
309
- },
310
- {
311
- "pattern": "attnres|attention_residual",
312
- "reason": "Block AttnRes may be mounted structurally but is never a PEFT target."
313
- },
314
- {
315
- "pattern": "dualpath",
316
- "reason": "DualPath is serving/runtime infrastructure only."
317
- },
318
- {
319
- "pattern": "mhc|hyper[-_]?connection",
320
- "reason": "mHC-inspired stability hooks remain structural, not PEFT surfaces."
321
- },
322
- {
323
- "pattern": "rope|rotary",
324
- "reason": "RoPE/context surgery is handled architecturally, not by narrow PEFT here."
325
- },
326
- {
327
- "pattern": "kvcache|kv_cache",
328
- "reason": "KV-cache runtime surfaces are not PEFT targets."
329
- },
330
- {
331
- "pattern": "(^|\\.)memory(_store|_bank)?($|\\.)",
332
- "reason": "Persistent memory stores are harness/runtime policy surfaces, not PEFT targets."
333
- }
334
- ],
335
- "inventory_size": 79,
336
- "inventory_source": "provided_names",
337
- "notes": [
338
- "Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.",
339
- "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op."
340
- ],
341
- "recommended_training": "ia3_head_only",
342
- "request": {
343
- "allow_backbone_bridge": false,
344
- "allow_vla_action_expert": false,
345
- "backbone": "/public/wang_libo/veriloop_coder_e1/model",
346
- "explicit_exclude_patterns": [],
347
- "explicit_include_patterns": [],
348
- "intents": [
349
- "uncertainty",
350
- "validator_alignment",
351
- "harness_alignment",
352
- "runtime_protocol",
353
- "session_continuity",
354
- "worktree_discipline"
355
- ],
356
- "prefer_qlora_for_backbone_bridge": true,
357
- "product_line": "veriloop_coder",
358
- "selection_mode": "minimal"
359
- },
360
- "selected_groups": [
361
- {
362
- "alpha": 16,
363
- "dropout": 0.0,
364
- "exclude_patterns": [
365
- "(^|\\.)lm_head($|\\.)",
366
- "(^|\\.)embed_tokens($|\\.)",
367
- "(^|\\.)norm($|\\.)",
368
- "attnres|attention_residual",
369
- "dualpath",
370
- "mhc|hyper[-_]?connection",
371
- "rope|rotary",
372
- "kvcache|kv_cache",
373
- "(^|\\.)memory(_store|_bank)?($|\\.)"
374
- ],
375
- "include_patterns": [
376
- "(^|\\.)(uncertainty_head|uncertainty_adapter|calib(_head|ration_adapter)?)($|\\.)"
377
- ],
378
- "intents": [
379
- "uncertainty"
380
- ],
381
- "layer_window": {
382
- "mode": "all",
383
- "value": 0.0
384
- },
385
- "matched_module_names": [
386
- "uncertainty_head",
387
- "uncertainty_head.calibration_mlp",
388
- "uncertainty_head.proj"
389
- ],
390
- "name": "group_1_custom_control_head",
391
- "rank": 8,
392
- "rationale": "Prefer explicit uncertainty / calibration heads over backbone surgery.",
393
- "risk": "low",
394
- "surface": "custom_control_head"
395
- },
396
- {
397
- "alpha": 16,
398
- "dropout": 0.05,
399
- "exclude_patterns": [
400
- "(^|\\.)lm_head($|\\.)",
401
- "(^|\\.)embed_tokens($|\\.)",
402
- "(^|\\.)norm($|\\.)",
403
- "attnres|attention_residual",
404
- "dualpath",
405
- "mhc|hyper[-_]?connection",
406
- "rope|rotary",
407
- "kvcache|kv_cache",
408
- "(^|\\.)memory(_store|_bank)?($|\\.)"
409
- ],
410
- "include_patterns": [
411
- "(^|\\.)(runtime_harness|query_runtime|task_brief|task_runtime|tool_protocol|permission_context|worktree|session_state|request_normalizer|action_allowlist|constraint_guard|progress_state|workspace_snapshot|repo_contract|knowledge_entry|completion_criteria|search_bridge|sandbox_search_bridge)(_adapter|_bridge|_head)?($|\\.)",
412
- "(^|\\.)(toolspec|tool(_call)?(_grammar|_interface)?|harness|validator|rollback|receipt|patch|permission|session|worktree)(_adapter|_bridge|_head)?($|\\.)"
413
- ],
414
- "intents": [
415
- "harness_alignment",
416
- "runtime_protocol",
417
- "session_continuity",
418
- "worktree_discipline"
419
- ],
420
- "layer_window": {
421
- "mode": "all",
422
- "value": 0.0
423
- },
424
- "matched_module_names": [
425
- "failure_signal_bridge.rollback_bridge",
426
- "request_normalizer",
427
- "request_normalizer.adapter",
428
- "rollback_adapter",
429
- "rollback_adapter.head",
430
- "runtime_harness_adapter",
431
- "runtime_harness_adapter.bridge",
432
- "tool_protocol_adapter",
433
- "tool_protocol_adapter.bridge",
434
- "toolspec_bridge",
435
- "toolspec_bridge.adapter",
436
- "toolspec_head",
437
- "toolspec_head.param_schema_adapter",
438
- "toolspec_head.postcondition_adapter",
439
- "toolspec_head.precondition_adapter",
440
- "toolspec_head.receipt_formatter",
441
- "toolspec_head.trigger_gate",
442
- "toolspec_head.validator_gate",
443
- "validator_feedback_loop.rollback_adapter"
444
- ],
445
- "name": "group_2_custom_runtime_harness_bridge",
446
- "rank": 8,
447
- "rationale": "Runtime / harness obedience should attach to explicit interface bridges before any backbone fallback.",
448
- "risk": "low",
449
- "surface": "custom_runtime_harness_bridge"
450
- },
451
- {
452
- "alpha": 16,
453
- "dropout": 0.0,
454
- "exclude_patterns": [
455
- "(^|\\.)lm_head($|\\.)",
456
- "(^|\\.)embed_tokens($|\\.)",
457
- "(^|\\.)norm($|\\.)",
458
- "attnres|attention_residual",
459
- "dualpath",
460
- "mhc|hyper[-_]?connection",
461
- "rope|rotary",
462
- "kvcache|kv_cache",
463
- "(^|\\.)memory(_store|_bank)?($|\\.)"
464
- ],
465
- "include_patterns": [
466
- "(^|\\.)(validator_feedback|sandbox_result_validator|sandbox_rollback_bridge|failure_signal|rollback)(_adapter|_bridge|_head)?($|\\.)"
467
- ],
468
- "intents": [
469
- "validator_alignment"
470
- ],
471
- "layer_window": {
472
- "mode": "all",
473
- "value": 0.0
474
- },
475
- "matched_module_names": [
476
- "failure_signal_bridge",
477
- "failure_signal_bridge.rollback_bridge",
478
- "rollback_adapter",
479
- "rollback_adapter.head",
480
- "sandbox_rollback_bridge",
481
- "sandbox_rollback_bridge.adapter",
482
- "validator_feedback_bridge",
483
- "validator_feedback_bridge.adapter",
484
- "validator_feedback_loop.rollback_adapter"
485
- ],
486
- "name": "group_3_custom_validator_bridge",
487
- "rank": 8,
488
- "rationale": "Validation and rollback fidelity should prefer explicit validator / rollback bridges.",
489
- "risk": "low",
490
- "surface": "custom_validator_bridge"
491
- },
492
- {
493
- "alpha": 8,
494
- "dropout": 0.0,
495
- "exclude_patterns": [
496
- "(^|\\.)lm_head($|\\.)",
497
- "(^|\\.)embed_tokens($|\\.)",
498
- "(^|\\.)norm($|\\.)",
499
- "attnres|attention_residual",
500
- "dualpath",
501
- "mhc|hyper[-_]?connection",
502
- "rope|rotary",
503
- "kvcache|kv_cache",
504
- "(^|\\.)memory(_store|_bank)?($|\\.)"
505
- ],
506
- "include_patterns": [
507
- "(^|\\.)(memory_boundary_guard|episodic_memory|session_compactor)(_adapter|_bridge|_head)?($|\\.)"
508
- ],
509
- "intents": [
510
- "session_continuity"
511
- ],
512
- "layer_window": {
513
- "mode": "all",
514
- "value": 0.0
515
- },
516
- "matched_module_names": [
517
- "episodic_memory",
518
- "episodic_memory.adapter",
519
- "memory_boundary_guard",
520
- "memory_boundary_guard.adapter",
521
- "memory_boundary_guard.rollback_filter",
522
- "session_compactor",
523
- "session_compactor.adapter"
524
- ],
525
- "name": "group_4_custom_memory_boundary_bridge",
526
- "rank": 4,
527
- "rationale": "Session continuity should bind to boundary-aware memory packet surfaces rather than broad backbone tuning.",
528
- "risk": "low",
529
- "surface": "custom_memory_boundary_bridge"
530
- }
531
- ],
532
- "upstream_profile_hint": null,
533
- "version": "veriloop.peft_target_selector.v2",
534
- "warnings": []
535
- },
536
- "training_mode": "mounted_head",
537
- "version": "veriloop.uncertainty_adapter_trainer.v5.qwen36",
538
- "warnings": [
539
- "Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.",
540
- "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op."
541
- ]
542
- },
543
- "status": "trained",
544
- "train_metrics": {
545
- "adapter_exported": true,
546
- "auto_lora_from_ia3": false,
547
- "best_epoch": 2,
548
- "best_quality_score": 0.8625935807221907,
549
- "epochs_completed": 4,
550
- "loss": 0.009006613283418119,
551
- "micro_batches": 154,
552
- "micro_batches_total": 616,
553
- "optimizer_steps": 10,
554
- "optimizer_steps_total": 40,
555
- "peft_method": "lora_narrow",
556
- "used_peft": true
557
- },
558
- "version": "veriloop.uncertainty_adapter_trainer.v5.qwen36",
559
- "warnings": [
560
- "Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.",
561
- "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op.",
562
- "Synthetic holdout split used for quality-oriented train/eval separation."
563
- ]
564
- }