abdelstark commited on
Commit
c803f37
·
verified ·
1 Parent(s): 4f61d8b

Add CodeLeWM checkpoint artifact codelewm-action-use-retrieval-20260520-7895d18

Browse files
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/checkpoints/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda
3
+ size 351495901
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/checkpoints/checkpoint.pt.manifest.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint_path": "checkpoint.pt",
3
+ "checkpoint_sha256": "0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda",
4
+ "metadata": {
5
+ "action_view": "text",
6
+ "config_hash": "9ac2ec1d60645bf2eb351d4799f5866533a26a39d7b3dec8dc064988c19c8cf8",
7
+ "latent_dim": 256,
8
+ "model_class": "TorchCodeTransitionModel",
9
+ "record_schema_version": "codelewm.transition.v1",
10
+ "schema_version": "codelewm.checkpoint.v1"
11
+ },
12
+ "migration_hook": null,
13
+ "schema_version": "codelewm.checkpoint.v1"
14
+ }
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "manifest": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/manifest.json",
4
+ "train": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/hdf5/train.hdf5",
5
+ "val": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/hdf5/val.hdf5"
6
+ },
7
+ "loader": {
8
+ "batch_size": 64,
9
+ "num_workers": 4,
10
+ "persistent_workers": true,
11
+ "pin_memory": true,
12
+ "shuffle": true
13
+ },
14
+ "loss": {
15
+ "action_use_margin": 0.02,
16
+ "action_use_margin_weight": 0.25,
17
+ "enable_action_use_margin": true,
18
+ "enable_retrieval_loss": true,
19
+ "retrieval_temperature": 0.1,
20
+ "retrieval_weight": 0.05,
21
+ "sigreg_knots": 17,
22
+ "sigreg_num_proj": 1024,
23
+ "sigreg_weight": 0.09
24
+ },
25
+ "name": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
26
+ "optimizer": {
27
+ "lr": 0.0001,
28
+ "type": "AdamW",
29
+ "weight_decay": 0.001
30
+ },
31
+ "output": {
32
+ "checkpoint_dir": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train/checkpoints",
33
+ "manifest_path": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train/training_manifest.json",
34
+ "metrics_path": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train/metrics.jsonl",
35
+ "run_dir": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train"
36
+ },
37
+ "schema_version": "codelewm.train_config.v1",
38
+ "seed": 240119,
39
+ "trainer": {
40
+ "accelerator": "gpu",
41
+ "devices": 1,
42
+ "gradient_clip_val": 1.0,
43
+ "max_steps": 60000,
44
+ "precision": "bf16-mixed"
45
+ },
46
+ "wm": {
47
+ "action_sequence_length": 256,
48
+ "action_view": "text",
49
+ "embed_dim": 256,
50
+ "history_size": 1,
51
+ "num_preds": 1,
52
+ "state_sequence_length": 1024
53
+ }
54
+ }
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/manifest.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_id": "training_run-924cd056375f11ea",
3
+ "artifact_kind": "training_run",
4
+ "command": [
5
+ "codelewm",
6
+ "train",
7
+ "--config",
8
+ "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/configs/train.json",
9
+ "--executor",
10
+ "torch",
11
+ "--device",
12
+ "auto",
13
+ "--overwrite",
14
+ "--json",
15
+ "--log-jsonl",
16
+ "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/logs/train.jsonl"
17
+ ],
18
+ "config_sha256": "a8c40e6dad64a96410b9855ce61146a85aa336325980c7a1080943b227e1ffc2",
19
+ "created_at": "2026-05-20T13:13:45Z",
20
+ "files": [
21
+ {
22
+ "bytes": 1789,
23
+ "path": "config.json",
24
+ "sha256": "4e2ad375b3fc132229ada18118a577dc882d68704cb9b317735b03579563f532"
25
+ },
26
+ {
27
+ "bytes": 1472,
28
+ "path": "metrics.jsonl",
29
+ "sha256": "9d8b35638953b0648c66d4f69e3e00d554a5c7efb76dd9afa553cce27d6a9eb3"
30
+ },
31
+ {
32
+ "bytes": 1614,
33
+ "path": "reports/metrics_report.json",
34
+ "sha256": "74270302bccf8913005b5177d8157266a9ba3f55746e296e2fa99fce66ed70de"
35
+ },
36
+ {
37
+ "bytes": 351495901,
38
+ "path": "checkpoints/checkpoint.pt",
39
+ "sha256": "0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda"
40
+ },
41
+ {
42
+ "bytes": 511,
43
+ "path": "checkpoints/checkpoint.pt.manifest.json",
44
+ "sha256": "9b8cb47facfa9f3c721fa845df0e374aa16692a63f4a93a06b806a95eb4ffeb9"
45
+ },
46
+ {
47
+ "bytes": 2667,
48
+ "path": "reports/torch_training_report.json",
49
+ "sha256": "7f2a603f05fd92497c5aaa854d9ba73476c285030176ed068afb62976bb0ec4b"
50
+ }
51
+ ],
52
+ "metadata": {
53
+ "dataset_manifest_path": ".artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/manifest.json",
54
+ "executor": {
55
+ "checkpoint_schema_version": "codelewm.checkpoint.v1",
56
+ "device": "cuda",
57
+ "executor": "torch",
58
+ "objective": {
59
+ "action_use_margin": 0.02,
60
+ "action_use_margin_weight": 0.25,
61
+ "enable_action_use_margin": true,
62
+ "enable_retrieval_loss": true,
63
+ "retrieval_temperature": 0.1,
64
+ "retrieval_weight": 0.05,
65
+ "sigreg_knots": 17,
66
+ "sigreg_num_proj": 1024,
67
+ "sigreg_weight": 0.09
68
+ },
69
+ "precision": "bf16-mixed",
70
+ "torch": "2.12.0+cu130",
71
+ "train_rows": 18019,
72
+ "val_rows": 1291
73
+ },
74
+ "final_metrics": {
75
+ "collapse/effective_rank": 10.542142224670206,
76
+ "collapse/effective_rank_ratio": 0.04118024306511799,
77
+ "collapse/embedding_count": 192.0,
78
+ "collapse/embedding_norm_mean": 13.19682605659637,
79
+ "collapse/latent_dim": 256.0,
80
+ "collapse/nearest_neighbor_entropy": 4.847709015203599,
81
+ "collapse/pairwise_cosine_mean": 0.004205391777077934,
82
+ "collapse/per_dim_variance_max": 2.3262929385455555,
83
+ "collapse/per_dim_variance_median": 0.6143968449321122,
84
+ "collapse/per_dim_variance_min": 0.06425978866539729,
85
+ "loss/action_use_margin": 0.005474980920553207,
86
+ "loss/action_use_margin_weighted": 0.0013687452301383018,
87
+ "loss/prediction_mse": 0.006972212344408035,
88
+ "loss/retrieval": 0.20804595947265625,
89
+ "loss/retrieval_weighted": 0.010402298532426357,
90
+ "loss/sigreg": 0.91015625,
91
+ "loss/sigreg_weighted": 0.08203125,
92
+ "loss/total": 0.10077450424432755,
93
+ "train/examples": 3833852.0,
94
+ "train/examples_per_second": 1064.5055633910936,
95
+ "train/gradient_norm": 1.3148689270019531,
96
+ "val/loss/action_use_margin": 0.05166880839637348,
97
+ "val/loss/action_use_margin_weighted": 0.01291720209909337,
98
+ "val/loss/prediction_mse": 0.18427681780996777,
99
+ "val/loss/retrieval": 0.9156766051337832,
100
+ "val/loss/retrieval_weighted": 0.04578383116140252,
101
+ "val/loss/sigreg": 1.887503743171692,
102
+ "val/loss/sigreg_weighted": 0.16987534293106624,
103
+ "val/loss/total": 0.4128531955537342
104
+ },
105
+ "run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
106
+ "schema_version": "codelewm.training_run.v1",
107
+ "seed": 240119,
108
+ "step_count": 60000
109
+ },
110
+ "parent_artifacts": [
111
+ "dataset-5695087296ce4a97"
112
+ ],
113
+ "schema_version": "codelewm.artifact_manifest.v1",
114
+ "source_git_sha": "7895d185e165a917af0956a313d8948c04b33638"
115
+ }
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics": {"collapse/effective_rank": 10.542142224670206, "collapse/effective_rank_ratio": 0.04118024306511799, "collapse/embedding_count": 192.0, "collapse/embedding_norm_mean": 13.19682605659637, "collapse/latent_dim": 256.0, "collapse/nearest_neighbor_entropy": 4.847709015203599, "collapse/pairwise_cosine_mean": 0.004205391777077934, "collapse/per_dim_variance_max": 2.3262929385455555, "collapse/per_dim_variance_median": 0.6143968449321122, "collapse/per_dim_variance_min": 0.06425978866539729, "loss/action_use_margin": 0.005474980920553207, "loss/action_use_margin_weighted": 0.0013687452301383018, "loss/prediction_mse": 0.006972212344408035, "loss/retrieval": 0.20804595947265625, "loss/retrieval_weighted": 0.010402298532426357, "loss/sigreg": 0.91015625, "loss/sigreg_weighted": 0.08203125, "loss/total": 0.10077450424432755, "train/examples": 3833852.0, "train/examples_per_second": 1064.5055633910936, "train/gradient_norm": 1.3148689270019531, "val/loss/action_use_margin": 0.05166880839637348, "val/loss/action_use_margin_weighted": 0.01291720209909337, "val/loss/prediction_mse": 0.18427681780996777, "val/loss/retrieval": 0.9156766051337832, "val/loss/retrieval_weighted": 0.04578383116140252, "val/loss/sigreg": 1.887503743171692, "val/loss/sigreg_weighted": 0.16987534293106624, "val/loss/total": 0.4128531955537342}, "run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g", "schema_version": "codelewm.training_metrics.v1", "step": 60000}
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/reports/metrics_report.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_metrics": {
3
+ "collapse/effective_rank": 10.542142224670206,
4
+ "collapse/effective_rank_ratio": 0.04118024306511799,
5
+ "collapse/embedding_count": 192.0,
6
+ "collapse/embedding_norm_mean": 13.19682605659637,
7
+ "collapse/latent_dim": 256.0,
8
+ "collapse/nearest_neighbor_entropy": 4.847709015203599,
9
+ "collapse/pairwise_cosine_mean": 0.004205391777077934,
10
+ "collapse/per_dim_variance_max": 2.3262929385455555,
11
+ "collapse/per_dim_variance_median": 0.6143968449321122,
12
+ "collapse/per_dim_variance_min": 0.06425978866539729,
13
+ "loss/action_use_margin": 0.005474980920553207,
14
+ "loss/action_use_margin_weighted": 0.0013687452301383018,
15
+ "loss/prediction_mse": 0.006972212344408035,
16
+ "loss/retrieval": 0.20804595947265625,
17
+ "loss/retrieval_weighted": 0.010402298532426357,
18
+ "loss/sigreg": 0.91015625,
19
+ "loss/sigreg_weighted": 0.08203125,
20
+ "loss/total": 0.10077450424432755,
21
+ "train/examples": 3833852.0,
22
+ "train/examples_per_second": 1064.5055633910936,
23
+ "train/gradient_norm": 1.3148689270019531,
24
+ "val/loss/action_use_margin": 0.05166880839637348,
25
+ "val/loss/action_use_margin_weighted": 0.01291720209909337,
26
+ "val/loss/prediction_mse": 0.18427681780996777,
27
+ "val/loss/retrieval": 0.9156766051337832,
28
+ "val/loss/retrieval_weighted": 0.04578383116140252,
29
+ "val/loss/sigreg": 1.887503743171692,
30
+ "val/loss/sigreg_weighted": 0.16987534293106624,
31
+ "val/loss/total": 0.4128531955537342
32
+ },
33
+ "run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
34
+ "schema_version": "codelewm.training_metrics.v1",
35
+ "step_count": 60000
36
+ }
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/reports/torch_training_report.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "collapse_report": {
3
+ "effective_rank": 10.542142224670206,
4
+ "effective_rank_ratio": 0.04118024306511799,
5
+ "embedding_count": 192,
6
+ "embedding_norm_mean": 13.19682605659637,
7
+ "latent_dim": 256,
8
+ "nearest_neighbor_entropy": 4.847709015203599,
9
+ "pairwise_cosine_mean": 0.004205391777077934,
10
+ "per_dim_variance_max": 2.3262929385455555,
11
+ "per_dim_variance_median": 0.6143968449321122,
12
+ "per_dim_variance_min": 0.06425978866539729,
13
+ "schema_version": "codelewm.eval.collapse_report.v1"
14
+ },
15
+ "dataset": {
16
+ "action_view": "text",
17
+ "train_rows": 18019,
18
+ "val_rows": 1291
19
+ },
20
+ "metrics": {
21
+ "collapse/effective_rank": 10.542142224670206,
22
+ "collapse/effective_rank_ratio": 0.04118024306511799,
23
+ "collapse/embedding_count": 192.0,
24
+ "collapse/embedding_norm_mean": 13.19682605659637,
25
+ "collapse/latent_dim": 256.0,
26
+ "collapse/nearest_neighbor_entropy": 4.847709015203599,
27
+ "collapse/pairwise_cosine_mean": 0.004205391777077934,
28
+ "collapse/per_dim_variance_max": 2.3262929385455555,
29
+ "collapse/per_dim_variance_median": 0.6143968449321122,
30
+ "collapse/per_dim_variance_min": 0.06425978866539729,
31
+ "loss/action_use_margin": 0.005474980920553207,
32
+ "loss/action_use_margin_weighted": 0.0013687452301383018,
33
+ "loss/prediction_mse": 0.006972212344408035,
34
+ "loss/retrieval": 0.20804595947265625,
35
+ "loss/retrieval_weighted": 0.010402298532426357,
36
+ "loss/sigreg": 0.91015625,
37
+ "loss/sigreg_weighted": 0.08203125,
38
+ "loss/total": 0.10077450424432755,
39
+ "train/examples": 3833852.0,
40
+ "train/examples_per_second": 1064.5055633910936,
41
+ "train/gradient_norm": 1.3148689270019531,
42
+ "val/loss/action_use_margin": 0.05166880839637348,
43
+ "val/loss/action_use_margin_weighted": 0.01291720209909337,
44
+ "val/loss/prediction_mse": 0.18427681780996777,
45
+ "val/loss/retrieval": 0.9156766051337832,
46
+ "val/loss/retrieval_weighted": 0.04578383116140252,
47
+ "val/loss/sigreg": 1.887503743171692,
48
+ "val/loss/sigreg_weighted": 0.16987534293106624,
49
+ "val/loss/total": 0.4128531955537342
50
+ },
51
+ "objective": {
52
+ "action_use_margin": 0.02,
53
+ "action_use_margin_weight": 0.25,
54
+ "enable_action_use_margin": true,
55
+ "enable_retrieval_loss": true,
56
+ "retrieval_temperature": 0.1,
57
+ "retrieval_weight": 0.05,
58
+ "sigreg_knots": 17,
59
+ "sigreg_num_proj": 1024,
60
+ "sigreg_weight": 0.09
61
+ },
62
+ "run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
63
+ "runtime": {
64
+ "device": "cuda",
65
+ "dtype": "torch.bfloat16",
66
+ "precision": "bf16-mixed",
67
+ "torch": "2.12.0+cu130"
68
+ },
69
+ "schema_version": "codelewm.torch_training_report.v1",
70
+ "step_count": 60000
71
+ }
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/training_manifest.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_manifest_id": "training_run-924cd056375f11ea",
3
+ "artifact_manifest_path": "manifest.json",
4
+ "checkpoint_files": [
5
+ {
6
+ "bytes": 351495901,
7
+ "path": "checkpoints/checkpoint.pt",
8
+ "sha256": "0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda"
9
+ },
10
+ {
11
+ "bytes": 511,
12
+ "path": "checkpoints/checkpoint.pt.manifest.json",
13
+ "sha256": "9b8cb47facfa9f3c721fa845df0e374aa16692a63f4a93a06b806a95eb4ffeb9"
14
+ }
15
+ ],
16
+ "config_path": "config.json",
17
+ "config_sha256": "a8c40e6dad64a96410b9855ce61146a85aa336325980c7a1080943b227e1ffc2",
18
+ "dataset_manifest_path": ".artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/manifest.json",
19
+ "final_metrics": {
20
+ "collapse/effective_rank": 10.542142224670206,
21
+ "collapse/effective_rank_ratio": 0.04118024306511799,
22
+ "collapse/embedding_count": 192.0,
23
+ "collapse/embedding_norm_mean": 13.19682605659637,
24
+ "collapse/latent_dim": 256.0,
25
+ "collapse/nearest_neighbor_entropy": 4.847709015203599,
26
+ "collapse/pairwise_cosine_mean": 0.004205391777077934,
27
+ "collapse/per_dim_variance_max": 2.3262929385455555,
28
+ "collapse/per_dim_variance_median": 0.6143968449321122,
29
+ "collapse/per_dim_variance_min": 0.06425978866539729,
30
+ "loss/action_use_margin": 0.005474980920553207,
31
+ "loss/action_use_margin_weighted": 0.0013687452301383018,
32
+ "loss/prediction_mse": 0.006972212344408035,
33
+ "loss/retrieval": 0.20804595947265625,
34
+ "loss/retrieval_weighted": 0.010402298532426357,
35
+ "loss/sigreg": 0.91015625,
36
+ "loss/sigreg_weighted": 0.08203125,
37
+ "loss/total": 0.10077450424432755,
38
+ "train/examples": 3833852.0,
39
+ "train/examples_per_second": 1064.5055633910936,
40
+ "train/gradient_norm": 1.3148689270019531,
41
+ "val/loss/action_use_margin": 0.05166880839637348,
42
+ "val/loss/action_use_margin_weighted": 0.01291720209909337,
43
+ "val/loss/prediction_mse": 0.18427681780996777,
44
+ "val/loss/retrieval": 0.9156766051337832,
45
+ "val/loss/retrieval_weighted": 0.04578383116140252,
46
+ "val/loss/sigreg": 1.887503743171692,
47
+ "val/loss/sigreg_weighted": 0.16987534293106624,
48
+ "val/loss/total": 0.4128531955537342
49
+ },
50
+ "metadata": {
51
+ "executor": {
52
+ "checkpoint_schema_version": "codelewm.checkpoint.v1",
53
+ "device": "cuda",
54
+ "executor": "torch",
55
+ "objective": {
56
+ "action_use_margin": 0.02,
57
+ "action_use_margin_weight": 0.25,
58
+ "enable_action_use_margin": true,
59
+ "enable_retrieval_loss": true,
60
+ "retrieval_temperature": 0.1,
61
+ "retrieval_weight": 0.05,
62
+ "sigreg_knots": 17,
63
+ "sigreg_num_proj": 1024,
64
+ "sigreg_weight": 0.09
65
+ },
66
+ "precision": "bf16-mixed",
67
+ "torch": "2.12.0+cu130",
68
+ "train_rows": 18019,
69
+ "val_rows": 1291
70
+ }
71
+ },
72
+ "metrics_path": "metrics.jsonl",
73
+ "metrics_report_path": "reports/metrics_report.json",
74
+ "parent_artifacts": [
75
+ "dataset-5695087296ce4a97"
76
+ ],
77
+ "report_files": [
78
+ {
79
+ "bytes": 2667,
80
+ "path": "reports/torch_training_report.json",
81
+ "sha256": "7f2a603f05fd92497c5aaa854d9ba73476c285030176ed068afb62976bb0ec4b"
82
+ }
83
+ ],
84
+ "run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
85
+ "schema_version": "codelewm.training_run.v1",
86
+ "seed": 240119,
87
+ "step_count": 60000
88
+ }