abdelstark commited on
Commit
d74cd14
·
verified ·
1 Parent(s): c803f37

Add CodeLeWM checkpoint artifact codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b

Browse files
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/checkpoints/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c5ba50ee0ec5e32ff5c3ceed848020e989ebdb1c98a917f17589ee523c6d7e
3
+ size 254388041
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/checkpoints/checkpoint.pt.manifest.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint_path": "checkpoint.pt",
3
+ "checkpoint_sha256": "f2c5ba50ee0ec5e32ff5c3ceed848020e989ebdb1c98a917f17589ee523c6d7e",
4
+ "metadata": {
5
+ "action_view": "text",
6
+ "config_hash": "5e42808433b8fc0d7eeff7ccd74f22453e2e3c95d2b6ea9e6057b1668043a793",
7
+ "latent_dim": 256,
8
+ "model_class": "TorchCodeTransitionModel",
9
+ "record_schema_version": "codelewm.transition.v1",
10
+ "schema_version": "codelewm.checkpoint.v1"
11
+ },
12
+ "migration_hook": null,
13
+ "schema_version": "codelewm.checkpoint.v1"
14
+ }
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "manifest": "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/pack/manifest.json",
4
+ "train": "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/pack/hdf5/train.hdf5",
5
+ "val": "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/pack/hdf5/val.hdf5"
6
+ },
7
+ "loader": {
8
+ "batch_size": 64,
9
+ "num_workers": 4,
10
+ "persistent_workers": true,
11
+ "pin_memory": true,
12
+ "shuffle": true
13
+ },
14
+ "loss": {
15
+ "action_swap_contrastive_margin": 0.05,
16
+ "action_swap_contrastive_weight": 0.2,
17
+ "action_use_margin": 0.02,
18
+ "action_use_margin_weight": 0.25,
19
+ "enable_action_swap_contrastive": true,
20
+ "enable_action_use_margin": true,
21
+ "enable_inverse_action_reconstruction": true,
22
+ "enable_retrieval_loss": false,
23
+ "inverse_action_reconstruction_weight": 0.1,
24
+ "retrieval_temperature": 0.1,
25
+ "retrieval_weight": 0.0,
26
+ "sigreg_knots": 17,
27
+ "sigreg_num_proj": 1024,
28
+ "sigreg_weight": 0.09
29
+ },
30
+ "name": "codelewm_scaled_v0_2_action_swap_inverse_gpu_a10g",
31
+ "optimizer": {
32
+ "lr": 0.0001,
33
+ "type": "AdamW",
34
+ "weight_decay": 0.001
35
+ },
36
+ "output": {
37
+ "checkpoint_dir": "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/train/checkpoints",
38
+ "manifest_path": "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/train/training_manifest.json",
39
+ "metrics_path": "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/train/metrics.jsonl",
40
+ "run_dir": "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/train"
41
+ },
42
+ "schema_version": "codelewm.train_config.v1",
43
+ "seed": 240119,
44
+ "trainer": {
45
+ "accelerator": "gpu",
46
+ "devices": 1,
47
+ "gradient_clip_val": 1.0,
48
+ "max_steps": 60000,
49
+ "precision": "bf16-mixed"
50
+ },
51
+ "wm": {
52
+ "action_fusion": "gated_residual",
53
+ "action_sequence_length": 256,
54
+ "action_view": "text",
55
+ "embed_dim": 256,
56
+ "history_size": 1,
57
+ "num_preds": 1,
58
+ "state_sequence_length": 1024
59
+ }
60
+ }
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/manifest.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_id": "training_run-0a41863d1da33737",
3
+ "artifact_kind": "training_run",
4
+ "command": [
5
+ "codelewm",
6
+ "train",
7
+ "--config",
8
+ "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/configs/train.json",
9
+ "--executor",
10
+ "torch",
11
+ "--device",
12
+ "auto",
13
+ "--overwrite",
14
+ "--json",
15
+ "--log-jsonl",
16
+ "/CodeLeWM/.artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/logs/train.jsonl"
17
+ ],
18
+ "config_sha256": "6bf543fe67dc43831a0c450b746986641f31cdf33179d8cb669fc05968ff967b",
19
+ "created_at": "2026-05-20T18:04:49Z",
20
+ "files": [
21
+ {
22
+ "bytes": 2069,
23
+ "path": "config.json",
24
+ "sha256": "beb59b73ecd13e78852635c9f8eb3f37bd2aceb36ff563d9599f27d8dd5abec4"
25
+ },
26
+ {
27
+ "bytes": 2144,
28
+ "path": "metrics.jsonl",
29
+ "sha256": "4858f91df9b0b5e51dc97e11583caab637048baa9b51e0ecb7d2f9288f7da91f"
30
+ },
31
+ {
32
+ "bytes": 2326,
33
+ "path": "reports/metrics_report.json",
34
+ "sha256": "794990f624ccde341d9c630df0c623161545915ff46a31fb237b99808f6873fd"
35
+ },
36
+ {
37
+ "bytes": 254388041,
38
+ "path": "checkpoints/checkpoint.pt",
39
+ "sha256": "f2c5ba50ee0ec5e32ff5c3ceed848020e989ebdb1c98a917f17589ee523c6d7e"
40
+ },
41
+ {
42
+ "bytes": 511,
43
+ "path": "checkpoints/checkpoint.pt.manifest.json",
44
+ "sha256": "98b3be6594bcb3d6e59999b47017ea146083d0d7ce54d675bfdc707d09c0b5bf"
45
+ },
46
+ {
47
+ "bytes": 3608,
48
+ "path": "reports/torch_training_report.json",
49
+ "sha256": "7a669bfeef22ff0e3420fc4b7453fdb436b76472bfa633b0ebef28cc9d616165"
50
+ }
51
+ ],
52
+ "metadata": {
53
+ "dataset_manifest_path": ".artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/pack/manifest.json",
54
+ "executor": {
55
+ "checkpoint_schema_version": "codelewm.checkpoint.v1",
56
+ "device": "cuda",
57
+ "executor": "torch",
58
+ "objective": {
59
+ "action_swap_contrastive_margin": 0.05,
60
+ "action_swap_contrastive_weight": 0.2,
61
+ "action_use_margin": 0.02,
62
+ "action_use_margin_weight": 0.25,
63
+ "enable_action_swap_contrastive": true,
64
+ "enable_action_use_margin": true,
65
+ "enable_inverse_action_reconstruction": true,
66
+ "enable_retrieval_loss": false,
67
+ "inverse_action_reconstruction_weight": 0.1,
68
+ "retrieval_temperature": 0.1,
69
+ "retrieval_weight": 0.0,
70
+ "sigreg_knots": 17,
71
+ "sigreg_num_proj": 1024,
72
+ "sigreg_weight": 0.09
73
+ },
74
+ "precision": "bf16-mixed",
75
+ "torch": "2.12.0+cu130",
76
+ "train_rows": 18019,
77
+ "val_rows": 1291
78
+ },
79
+ "final_metrics": {
80
+ "action_diagnostics/positive_distance": 0.004852294921875,
81
+ "action_diagnostics/swap_distance_gap": 0.26171875,
82
+ "action_diagnostics/swapped_distance": 0.265625,
83
+ "collapse/effective_rank": 4.03470884645853,
84
+ "collapse/effective_rank_ratio": 0.015760581431478633,
85
+ "collapse/embedding_count": 192.0,
86
+ "collapse/embedding_norm_mean": 14.392418570878986,
87
+ "collapse/latent_dim": 256.0,
88
+ "collapse/nearest_neighbor_entropy": 4.822367445946839,
89
+ "collapse/pairwise_cosine_mean": 0.006447293492730005,
90
+ "collapse/per_dim_variance_max": 3.456151289617992,
91
+ "collapse/per_dim_variance_median": 0.7004040151172777,
92
+ "collapse/per_dim_variance_min": 0.04232150622637062,
93
+ "loss/action_swap_contrastive": 0.0048749265260994434,
94
+ "loss/action_swap_contrastive_weighted": 0.0009749853052198887,
95
+ "loss/action_use_margin": 0.007081065326929092,
96
+ "loss/action_use_margin_weighted": 0.001770266331732273,
97
+ "loss/inverse_action_reconstruction": 0.21137171983718872,
98
+ "loss/inverse_action_reconstruction_weighted": 0.021137172356247902,
99
+ "loss/prediction_mse": 0.004846842493861914,
100
+ "loss/sigreg": 1.0546875,
101
+ "loss/sigreg_weighted": 0.0947265625,
102
+ "loss/total": 0.12345582246780396,
103
+ "train/examples": 3833852.0,
104
+ "train/examples_per_second": 1323.9171764663713,
105
+ "train/gradient_norm": 2.023609161376953,
106
+ "val/action_diagnostics/positive_distance": 0.1494137846997806,
107
+ "val/action_diagnostics/swap_distance_gap": 0.03513682243369874,
108
+ "val/action_diagnostics/swapped_distance": 0.18455060713347934,
109
+ "val/loss/action_swap_contrastive": 0.059425451925822666,
110
+ "val/loss/action_swap_contrastive_weighted": 0.011885090686735652,
111
+ "val/loss/action_use_margin": 0.05836626293048972,
112
+ "val/loss/action_use_margin_weighted": 0.01459156573262243,
113
+ "val/loss/inverse_action_reconstruction": 0.552373686007091,
114
+ "val/loss/inverse_action_reconstruction_weighted": 0.055237369877951484,
115
+ "val/loss/prediction_mse": 0.14941378363541194,
116
+ "val/loss/sigreg": 2.168621886344183,
117
+ "val/loss/sigreg_weighted": 0.19517597804466882,
118
+ "val/loss/total": 0.4263037897291638
119
+ },
120
+ "run_id": "codelewm_scaled_v0_2_action_swap_inverse_gpu_a10g",
121
+ "schema_version": "codelewm.training_run.v1",
122
+ "seed": 240119,
123
+ "step_count": 60000
124
+ },
125
+ "parent_artifacts": [
126
+ "dataset-daecac9f9965c563"
127
+ ],
128
+ "schema_version": "codelewm.artifact_manifest.v1",
129
+ "source_git_sha": "7c7cb0b8fe132e4819f05a77585c254267e77574"
130
+ }
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics": {"action_diagnostics/positive_distance": 0.004852294921875, "action_diagnostics/swap_distance_gap": 0.26171875, "action_diagnostics/swapped_distance": 0.265625, "collapse/effective_rank": 4.03470884645853, "collapse/effective_rank_ratio": 0.015760581431478633, "collapse/embedding_count": 192.0, "collapse/embedding_norm_mean": 14.392418570878986, "collapse/latent_dim": 256.0, "collapse/nearest_neighbor_entropy": 4.822367445946839, "collapse/pairwise_cosine_mean": 0.006447293492730005, "collapse/per_dim_variance_max": 3.456151289617992, "collapse/per_dim_variance_median": 0.7004040151172777, "collapse/per_dim_variance_min": 0.04232150622637062, "loss/action_swap_contrastive": 0.0048749265260994434, "loss/action_swap_contrastive_weighted": 0.0009749853052198887, "loss/action_use_margin": 0.007081065326929092, "loss/action_use_margin_weighted": 0.001770266331732273, "loss/inverse_action_reconstruction": 0.21137171983718872, "loss/inverse_action_reconstruction_weighted": 0.021137172356247902, "loss/prediction_mse": 0.004846842493861914, "loss/sigreg": 1.0546875, "loss/sigreg_weighted": 0.0947265625, "loss/total": 0.12345582246780396, "train/examples": 3833852.0, "train/examples_per_second": 1323.9171764663713, "train/gradient_norm": 2.023609161376953, "val/action_diagnostics/positive_distance": 0.1494137846997806, "val/action_diagnostics/swap_distance_gap": 0.03513682243369874, "val/action_diagnostics/swapped_distance": 0.18455060713347934, "val/loss/action_swap_contrastive": 0.059425451925822666, "val/loss/action_swap_contrastive_weighted": 0.011885090686735652, "val/loss/action_use_margin": 0.05836626293048972, "val/loss/action_use_margin_weighted": 0.01459156573262243, "val/loss/inverse_action_reconstruction": 0.552373686007091, "val/loss/inverse_action_reconstruction_weighted": 0.055237369877951484, "val/loss/prediction_mse": 0.14941378363541194, "val/loss/sigreg": 2.168621886344183, "val/loss/sigreg_weighted": 0.19517597804466882, "val/loss/total": 0.4263037897291638}, "run_id": "codelewm_scaled_v0_2_action_swap_inverse_gpu_a10g", "schema_version": "codelewm.training_metrics.v1", "step": 60000}
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/reports/metrics_report.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_metrics": {
3
+ "action_diagnostics/positive_distance": 0.004852294921875,
4
+ "action_diagnostics/swap_distance_gap": 0.26171875,
5
+ "action_diagnostics/swapped_distance": 0.265625,
6
+ "collapse/effective_rank": 4.03470884645853,
7
+ "collapse/effective_rank_ratio": 0.015760581431478633,
8
+ "collapse/embedding_count": 192.0,
9
+ "collapse/embedding_norm_mean": 14.392418570878986,
10
+ "collapse/latent_dim": 256.0,
11
+ "collapse/nearest_neighbor_entropy": 4.822367445946839,
12
+ "collapse/pairwise_cosine_mean": 0.006447293492730005,
13
+ "collapse/per_dim_variance_max": 3.456151289617992,
14
+ "collapse/per_dim_variance_median": 0.7004040151172777,
15
+ "collapse/per_dim_variance_min": 0.04232150622637062,
16
+ "loss/action_swap_contrastive": 0.0048749265260994434,
17
+ "loss/action_swap_contrastive_weighted": 0.0009749853052198887,
18
+ "loss/action_use_margin": 0.007081065326929092,
19
+ "loss/action_use_margin_weighted": 0.001770266331732273,
20
+ "loss/inverse_action_reconstruction": 0.21137171983718872,
21
+ "loss/inverse_action_reconstruction_weighted": 0.021137172356247902,
22
+ "loss/prediction_mse": 0.004846842493861914,
23
+ "loss/sigreg": 1.0546875,
24
+ "loss/sigreg_weighted": 0.0947265625,
25
+ "loss/total": 0.12345582246780396,
26
+ "train/examples": 3833852.0,
27
+ "train/examples_per_second": 1323.9171764663713,
28
+ "train/gradient_norm": 2.023609161376953,
29
+ "val/action_diagnostics/positive_distance": 0.1494137846997806,
30
+ "val/action_diagnostics/swap_distance_gap": 0.03513682243369874,
31
+ "val/action_diagnostics/swapped_distance": 0.18455060713347934,
32
+ "val/loss/action_swap_contrastive": 0.059425451925822666,
33
+ "val/loss/action_swap_contrastive_weighted": 0.011885090686735652,
34
+ "val/loss/action_use_margin": 0.05836626293048972,
35
+ "val/loss/action_use_margin_weighted": 0.01459156573262243,
36
+ "val/loss/inverse_action_reconstruction": 0.552373686007091,
37
+ "val/loss/inverse_action_reconstruction_weighted": 0.055237369877951484,
38
+ "val/loss/prediction_mse": 0.14941378363541194,
39
+ "val/loss/sigreg": 2.168621886344183,
40
+ "val/loss/sigreg_weighted": 0.19517597804466882,
41
+ "val/loss/total": 0.4263037897291638
42
+ },
43
+ "run_id": "codelewm_scaled_v0_2_action_swap_inverse_gpu_a10g",
44
+ "schema_version": "codelewm.training_metrics.v1",
45
+ "step_count": 60000
46
+ }
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/reports/torch_training_report.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "collapse_report": {
3
+ "effective_rank": 4.03470884645853,
4
+ "effective_rank_ratio": 0.015760581431478633,
5
+ "embedding_count": 192,
6
+ "embedding_norm_mean": 14.392418570878986,
7
+ "latent_dim": 256,
8
+ "nearest_neighbor_entropy": 4.822367445946839,
9
+ "pairwise_cosine_mean": 0.006447293492730005,
10
+ "per_dim_variance_max": 3.456151289617992,
11
+ "per_dim_variance_median": 0.7004040151172777,
12
+ "per_dim_variance_min": 0.04232150622637062,
13
+ "schema_version": "codelewm.eval.collapse_report.v1"
14
+ },
15
+ "dataset": {
16
+ "action_view": "text",
17
+ "train_rows": 18019,
18
+ "val_rows": 1291
19
+ },
20
+ "metrics": {
21
+ "action_diagnostics/positive_distance": 0.004852294921875,
22
+ "action_diagnostics/swap_distance_gap": 0.26171875,
23
+ "action_diagnostics/swapped_distance": 0.265625,
24
+ "collapse/effective_rank": 4.03470884645853,
25
+ "collapse/effective_rank_ratio": 0.015760581431478633,
26
+ "collapse/embedding_count": 192.0,
27
+ "collapse/embedding_norm_mean": 14.392418570878986,
28
+ "collapse/latent_dim": 256.0,
29
+ "collapse/nearest_neighbor_entropy": 4.822367445946839,
30
+ "collapse/pairwise_cosine_mean": 0.006447293492730005,
31
+ "collapse/per_dim_variance_max": 3.456151289617992,
32
+ "collapse/per_dim_variance_median": 0.7004040151172777,
33
+ "collapse/per_dim_variance_min": 0.04232150622637062,
34
+ "loss/action_swap_contrastive": 0.0048749265260994434,
35
+ "loss/action_swap_contrastive_weighted": 0.0009749853052198887,
36
+ "loss/action_use_margin": 0.007081065326929092,
37
+ "loss/action_use_margin_weighted": 0.001770266331732273,
38
+ "loss/inverse_action_reconstruction": 0.21137171983718872,
39
+ "loss/inverse_action_reconstruction_weighted": 0.021137172356247902,
40
+ "loss/prediction_mse": 0.004846842493861914,
41
+ "loss/sigreg": 1.0546875,
42
+ "loss/sigreg_weighted": 0.0947265625,
43
+ "loss/total": 0.12345582246780396,
44
+ "train/examples": 3833852.0,
45
+ "train/examples_per_second": 1323.9171764663713,
46
+ "train/gradient_norm": 2.023609161376953,
47
+ "val/action_diagnostics/positive_distance": 0.1494137846997806,
48
+ "val/action_diagnostics/swap_distance_gap": 0.03513682243369874,
49
+ "val/action_diagnostics/swapped_distance": 0.18455060713347934,
50
+ "val/loss/action_swap_contrastive": 0.059425451925822666,
51
+ "val/loss/action_swap_contrastive_weighted": 0.011885090686735652,
52
+ "val/loss/action_use_margin": 0.05836626293048972,
53
+ "val/loss/action_use_margin_weighted": 0.01459156573262243,
54
+ "val/loss/inverse_action_reconstruction": 0.552373686007091,
55
+ "val/loss/inverse_action_reconstruction_weighted": 0.055237369877951484,
56
+ "val/loss/prediction_mse": 0.14941378363541194,
57
+ "val/loss/sigreg": 2.168621886344183,
58
+ "val/loss/sigreg_weighted": 0.19517597804466882,
59
+ "val/loss/total": 0.4263037897291638
60
+ },
61
+ "objective": {
62
+ "action_swap_contrastive_margin": 0.05,
63
+ "action_swap_contrastive_weight": 0.2,
64
+ "action_use_margin": 0.02,
65
+ "action_use_margin_weight": 0.25,
66
+ "enable_action_swap_contrastive": true,
67
+ "enable_action_use_margin": true,
68
+ "enable_inverse_action_reconstruction": true,
69
+ "enable_retrieval_loss": false,
70
+ "inverse_action_reconstruction_weight": 0.1,
71
+ "retrieval_temperature": 0.1,
72
+ "retrieval_weight": 0.0,
73
+ "sigreg_knots": 17,
74
+ "sigreg_num_proj": 1024,
75
+ "sigreg_weight": 0.09
76
+ },
77
+ "run_id": "codelewm_scaled_v0_2_action_swap_inverse_gpu_a10g",
78
+ "runtime": {
79
+ "device": "cuda",
80
+ "dtype": "torch.bfloat16",
81
+ "precision": "bf16-mixed",
82
+ "torch": "2.12.0+cu130"
83
+ },
84
+ "schema_version": "codelewm.torch_training_report.v1",
85
+ "step_count": 60000
86
+ }
checkpoints/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/training_manifest.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_manifest_id": "training_run-0a41863d1da33737",
3
+ "artifact_manifest_path": "manifest.json",
4
+ "checkpoint_files": [
5
+ {
6
+ "bytes": 254388041,
7
+ "path": "checkpoints/checkpoint.pt",
8
+ "sha256": "f2c5ba50ee0ec5e32ff5c3ceed848020e989ebdb1c98a917f17589ee523c6d7e"
9
+ },
10
+ {
11
+ "bytes": 511,
12
+ "path": "checkpoints/checkpoint.pt.manifest.json",
13
+ "sha256": "98b3be6594bcb3d6e59999b47017ea146083d0d7ce54d675bfdc707d09c0b5bf"
14
+ }
15
+ ],
16
+ "config_path": "config.json",
17
+ "config_sha256": "6bf543fe67dc43831a0c450b746986641f31cdf33179d8cb669fc05968ff967b",
18
+ "dataset_manifest_path": ".artifacts/hf/codelewm-v0-2-action-swap-rerun-20260520-7c7cb0b/pack/manifest.json",
19
+ "final_metrics": {
20
+ "action_diagnostics/positive_distance": 0.004852294921875,
21
+ "action_diagnostics/swap_distance_gap": 0.26171875,
22
+ "action_diagnostics/swapped_distance": 0.265625,
23
+ "collapse/effective_rank": 4.03470884645853,
24
+ "collapse/effective_rank_ratio": 0.015760581431478633,
25
+ "collapse/embedding_count": 192.0,
26
+ "collapse/embedding_norm_mean": 14.392418570878986,
27
+ "collapse/latent_dim": 256.0,
28
+ "collapse/nearest_neighbor_entropy": 4.822367445946839,
29
+ "collapse/pairwise_cosine_mean": 0.006447293492730005,
30
+ "collapse/per_dim_variance_max": 3.456151289617992,
31
+ "collapse/per_dim_variance_median": 0.7004040151172777,
32
+ "collapse/per_dim_variance_min": 0.04232150622637062,
33
+ "loss/action_swap_contrastive": 0.0048749265260994434,
34
+ "loss/action_swap_contrastive_weighted": 0.0009749853052198887,
35
+ "loss/action_use_margin": 0.007081065326929092,
36
+ "loss/action_use_margin_weighted": 0.001770266331732273,
37
+ "loss/inverse_action_reconstruction": 0.21137171983718872,
38
+ "loss/inverse_action_reconstruction_weighted": 0.021137172356247902,
39
+ "loss/prediction_mse": 0.004846842493861914,
40
+ "loss/sigreg": 1.0546875,
41
+ "loss/sigreg_weighted": 0.0947265625,
42
+ "loss/total": 0.12345582246780396,
43
+ "train/examples": 3833852.0,
44
+ "train/examples_per_second": 1323.9171764663713,
45
+ "train/gradient_norm": 2.023609161376953,
46
+ "val/action_diagnostics/positive_distance": 0.1494137846997806,
47
+ "val/action_diagnostics/swap_distance_gap": 0.03513682243369874,
48
+ "val/action_diagnostics/swapped_distance": 0.18455060713347934,
49
+ "val/loss/action_swap_contrastive": 0.059425451925822666,
50
+ "val/loss/action_swap_contrastive_weighted": 0.011885090686735652,
51
+ "val/loss/action_use_margin": 0.05836626293048972,
52
+ "val/loss/action_use_margin_weighted": 0.01459156573262243,
53
+ "val/loss/inverse_action_reconstruction": 0.552373686007091,
54
+ "val/loss/inverse_action_reconstruction_weighted": 0.055237369877951484,
55
+ "val/loss/prediction_mse": 0.14941378363541194,
56
+ "val/loss/sigreg": 2.168621886344183,
57
+ "val/loss/sigreg_weighted": 0.19517597804466882,
58
+ "val/loss/total": 0.4263037897291638
59
+ },
60
+ "metadata": {
61
+ "executor": {
62
+ "checkpoint_schema_version": "codelewm.checkpoint.v1",
63
+ "device": "cuda",
64
+ "executor": "torch",
65
+ "objective": {
66
+ "action_swap_contrastive_margin": 0.05,
67
+ "action_swap_contrastive_weight": 0.2,
68
+ "action_use_margin": 0.02,
69
+ "action_use_margin_weight": 0.25,
70
+ "enable_action_swap_contrastive": true,
71
+ "enable_action_use_margin": true,
72
+ "enable_inverse_action_reconstruction": true,
73
+ "enable_retrieval_loss": false,
74
+ "inverse_action_reconstruction_weight": 0.1,
75
+ "retrieval_temperature": 0.1,
76
+ "retrieval_weight": 0.0,
77
+ "sigreg_knots": 17,
78
+ "sigreg_num_proj": 1024,
79
+ "sigreg_weight": 0.09
80
+ },
81
+ "precision": "bf16-mixed",
82
+ "torch": "2.12.0+cu130",
83
+ "train_rows": 18019,
84
+ "val_rows": 1291
85
+ }
86
+ },
87
+ "metrics_path": "metrics.jsonl",
88
+ "metrics_report_path": "reports/metrics_report.json",
89
+ "parent_artifacts": [
90
+ "dataset-daecac9f9965c563"
91
+ ],
92
+ "report_files": [
93
+ {
94
+ "bytes": 3608,
95
+ "path": "reports/torch_training_report.json",
96
+ "sha256": "7a669bfeef22ff0e3420fc4b7453fdb436b76472bfa633b0ebef28cc9d616165"
97
+ }
98
+ ],
99
+ "run_id": "codelewm_scaled_v0_2_action_swap_inverse_gpu_a10g",
100
+ "schema_version": "codelewm.training_run.v1",
101
+ "seed": 240119,
102
+ "step_count": 60000
103
+ }