abdelstark commited on
Commit
526282c
·
verified ·
1 Parent(s): 0bcfcbf

Add CodeLeWM checkpoint artifact codelewm-scaled-20260520-9699b53

Browse files
checkpoints/codelewm-scaled-20260520-9699b53/checkpoints/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09bf8d3880ec272a858dd9b19f2b29622a66a5ebbef6dbd1f8e4ebeb8b6392b8
3
+ size 351495453
checkpoints/codelewm-scaled-20260520-9699b53/checkpoints/checkpoint.pt.manifest.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint_path": "checkpoint.pt",
3
+ "checkpoint_sha256": "09bf8d3880ec272a858dd9b19f2b29622a66a5ebbef6dbd1f8e4ebeb8b6392b8",
4
+ "metadata": {
5
+ "action_view": "text",
6
+ "config_hash": "72822cb45ab87284e629399b567b0f6daa882d79e2f3454654b0835880a73408",
7
+ "latent_dim": 256,
8
+ "model_class": "TorchCodeTransitionModel",
9
+ "record_schema_version": "codelewm.transition.v1",
10
+ "schema_version": "codelewm.checkpoint.v1"
11
+ },
12
+ "migration_hook": null,
13
+ "schema_version": "codelewm.checkpoint.v1"
14
+ }
checkpoints/codelewm-scaled-20260520-9699b53/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "manifest": "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/pack/manifest.json",
4
+ "train": "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/pack/hdf5/train.hdf5",
5
+ "val": "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/pack/hdf5/val.hdf5"
6
+ },
7
+ "loader": {
8
+ "batch_size": 64,
9
+ "num_workers": 4,
10
+ "persistent_workers": true,
11
+ "pin_memory": true,
12
+ "shuffle": true
13
+ },
14
+ "loss": {
15
+ "enable_retrieval_loss": false,
16
+ "retrieval_temperature": 0.1,
17
+ "retrieval_weight": 0.0,
18
+ "sigreg_knots": 17,
19
+ "sigreg_num_proj": 1024,
20
+ "sigreg_weight": 0.09
21
+ },
22
+ "name": "codelewm_scaled_gpu_a10g",
23
+ "optimizer": {
24
+ "lr": 0.0001,
25
+ "type": "AdamW",
26
+ "weight_decay": 0.001
27
+ },
28
+ "output": {
29
+ "checkpoint_dir": "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/train/checkpoints",
30
+ "manifest_path": "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/train/training_manifest.json",
31
+ "metrics_path": "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/train/metrics.jsonl",
32
+ "run_dir": "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/train"
33
+ },
34
+ "schema_version": "codelewm.train_config.v1",
35
+ "seed": 240119,
36
+ "trainer": {
37
+ "accelerator": "gpu",
38
+ "devices": 1,
39
+ "gradient_clip_val": 1.0,
40
+ "max_steps": 60000,
41
+ "precision": "bf16-mixed"
42
+ },
43
+ "wm": {
44
+ "action_sequence_length": 256,
45
+ "action_view": "text",
46
+ "embed_dim": 256,
47
+ "history_size": 1,
48
+ "num_preds": 1,
49
+ "state_sequence_length": 1024
50
+ }
51
+ }
checkpoints/codelewm-scaled-20260520-9699b53/manifest.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_id": "training_run-d9074199c0d58911",
3
+ "artifact_kind": "training_run",
4
+ "command": [
5
+ "codelewm",
6
+ "train",
7
+ "--config",
8
+ "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/configs/train.json",
9
+ "--executor",
10
+ "torch",
11
+ "--device",
12
+ "auto",
13
+ "--overwrite",
14
+ "--json",
15
+ "--log-jsonl",
16
+ "/CodeLeWM/.artifacts/hf/codelewm-scaled-20260520-9699b53/logs/train.jsonl"
17
+ ],
18
+ "config_sha256": "119507eb9909ba9c8dac134595266f5a941ea5c1d096ceeb61679da14d336dad",
19
+ "created_at": "2026-05-20T06:22:58Z",
20
+ "files": [
21
+ {
22
+ "bytes": 1556,
23
+ "path": "config.json",
24
+ "sha256": "889396e0262d4fadefa558ccab00934fac29ad58f307643a81612cca73d98488"
25
+ },
26
+ {
27
+ "bytes": 1040,
28
+ "path": "metrics.jsonl",
29
+ "sha256": "68a6b2d80774a62f31b87e9d6ee3c4b36978eddeb1805a849d47458b8871ffd9"
30
+ },
31
+ {
32
+ "bytes": 1150,
33
+ "path": "reports/metrics_report.json",
34
+ "sha256": "ea1a938c6a3b90f74c2eea9ab09723c004f24a4f7043fb3bdc7edfc95516f34f"
35
+ },
36
+ {
37
+ "bytes": 351495453,
38
+ "path": "checkpoints/checkpoint.pt",
39
+ "sha256": "09bf8d3880ec272a858dd9b19f2b29622a66a5ebbef6dbd1f8e4ebeb8b6392b8"
40
+ },
41
+ {
42
+ "bytes": 511,
43
+ "path": "checkpoints/checkpoint.pt.manifest.json",
44
+ "sha256": "b18a57a783d2c4693f7c99489ad4752ce6fa1ba8b64a61ccef688dfb599189fd"
45
+ },
46
+ {
47
+ "bytes": 1893,
48
+ "path": "reports/torch_training_report.json",
49
+ "sha256": "12d3ace9319d0c1b379d32986365fc2b20baad839c2f20ba2a86d249b4672004"
50
+ }
51
+ ],
52
+ "metadata": {
53
+ "dataset_manifest_path": ".artifacts/hf/codelewm-scaled-20260520-9699b53/pack/manifest.json",
54
+ "executor": {
55
+ "checkpoint_schema_version": "codelewm.checkpoint.v1",
56
+ "device": "cuda",
57
+ "executor": "torch",
58
+ "precision": "bf16-mixed",
59
+ "torch": "2.12.0+cu130",
60
+ "train_rows": 18019,
61
+ "val_rows": 1291
62
+ },
63
+ "final_metrics": {
64
+ "collapse/effective_rank": 5.437364589252875,
65
+ "collapse/effective_rank_ratio": 0.02123970542676904,
66
+ "collapse/embedding_count": 192.0,
67
+ "collapse/embedding_norm_mean": 14.346564877617572,
68
+ "collapse/latent_dim": 256.0,
69
+ "collapse/nearest_neighbor_entropy": 4.85942433071649,
70
+ "collapse/pairwise_cosine_mean": 0.008317808396257337,
71
+ "collapse/per_dim_variance_max": 3.289028716596054,
72
+ "collapse/per_dim_variance_median": 0.6952048902794287,
73
+ "collapse/per_dim_variance_min": 0.0540634111270202,
74
+ "loss/prediction_mse": 0.005166558548808098,
75
+ "loss/sigreg": 1.203125,
76
+ "loss/sigreg_weighted": 0.1083984375,
77
+ "loss/total": 0.11356499791145325,
78
+ "train/examples": 3833852.0,
79
+ "train/examples_per_second": 1103.204300183749,
80
+ "train/gradient_norm": 1.8980227708816528,
81
+ "val/loss/prediction_mse": 0.17075516851175399,
82
+ "val/loss/sigreg": 2.048149585723877,
83
+ "val/loss/sigreg_weighted": 0.1843334688317208,
84
+ "val/loss/total": 0.3550886376982644
85
+ },
86
+ "run_id": "codelewm_scaled_gpu_a10g",
87
+ "schema_version": "codelewm.training_run.v1",
88
+ "seed": 240119,
89
+ "step_count": 60000
90
+ },
91
+ "parent_artifacts": [
92
+ "dataset-ef8ad3f4f48dea9e"
93
+ ],
94
+ "schema_version": "codelewm.artifact_manifest.v1",
95
+ "source_git_sha": "9699b5309e43a3278f272663ef60cda23040d92a"
96
+ }
checkpoints/codelewm-scaled-20260520-9699b53/metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics": {"collapse/effective_rank": 5.437364589252875, "collapse/effective_rank_ratio": 0.02123970542676904, "collapse/embedding_count": 192.0, "collapse/embedding_norm_mean": 14.346564877617572, "collapse/latent_dim": 256.0, "collapse/nearest_neighbor_entropy": 4.85942433071649, "collapse/pairwise_cosine_mean": 0.008317808396257337, "collapse/per_dim_variance_max": 3.289028716596054, "collapse/per_dim_variance_median": 0.6952048902794287, "collapse/per_dim_variance_min": 0.0540634111270202, "loss/prediction_mse": 0.005166558548808098, "loss/sigreg": 1.203125, "loss/sigreg_weighted": 0.1083984375, "loss/total": 0.11356499791145325, "train/examples": 3833852.0, "train/examples_per_second": 1103.204300183749, "train/gradient_norm": 1.8980227708816528, "val/loss/prediction_mse": 0.17075516851175399, "val/loss/sigreg": 2.048149585723877, "val/loss/sigreg_weighted": 0.1843334688317208, "val/loss/total": 0.3550886376982644}, "run_id": "codelewm_scaled_gpu_a10g", "schema_version": "codelewm.training_metrics.v1", "step": 60000}
checkpoints/codelewm-scaled-20260520-9699b53/reports/metrics_report.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_metrics": {
3
+ "collapse/effective_rank": 5.437364589252875,
4
+ "collapse/effective_rank_ratio": 0.02123970542676904,
5
+ "collapse/embedding_count": 192.0,
6
+ "collapse/embedding_norm_mean": 14.346564877617572,
7
+ "collapse/latent_dim": 256.0,
8
+ "collapse/nearest_neighbor_entropy": 4.85942433071649,
9
+ "collapse/pairwise_cosine_mean": 0.008317808396257337,
10
+ "collapse/per_dim_variance_max": 3.289028716596054,
11
+ "collapse/per_dim_variance_median": 0.6952048902794287,
12
+ "collapse/per_dim_variance_min": 0.0540634111270202,
13
+ "loss/prediction_mse": 0.005166558548808098,
14
+ "loss/sigreg": 1.203125,
15
+ "loss/sigreg_weighted": 0.1083984375,
16
+ "loss/total": 0.11356499791145325,
17
+ "train/examples": 3833852.0,
18
+ "train/examples_per_second": 1103.204300183749,
19
+ "train/gradient_norm": 1.8980227708816528,
20
+ "val/loss/prediction_mse": 0.17075516851175399,
21
+ "val/loss/sigreg": 2.048149585723877,
22
+ "val/loss/sigreg_weighted": 0.1843334688317208,
23
+ "val/loss/total": 0.3550886376982644
24
+ },
25
+ "run_id": "codelewm_scaled_gpu_a10g",
26
+ "schema_version": "codelewm.training_metrics.v1",
27
+ "step_count": 60000
28
+ }
checkpoints/codelewm-scaled-20260520-9699b53/reports/torch_training_report.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "collapse_report": {
3
+ "effective_rank": 5.437364589252875,
4
+ "effective_rank_ratio": 0.02123970542676904,
5
+ "embedding_count": 192,
6
+ "embedding_norm_mean": 14.346564877617572,
7
+ "latent_dim": 256,
8
+ "nearest_neighbor_entropy": 4.85942433071649,
9
+ "pairwise_cosine_mean": 0.008317808396257337,
10
+ "per_dim_variance_max": 3.289028716596054,
11
+ "per_dim_variance_median": 0.6952048902794287,
12
+ "per_dim_variance_min": 0.0540634111270202,
13
+ "schema_version": "codelewm.eval.collapse_report.v1"
14
+ },
15
+ "dataset": {
16
+ "action_view": "text",
17
+ "train_rows": 18019,
18
+ "val_rows": 1291
19
+ },
20
+ "metrics": {
21
+ "collapse/effective_rank": 5.437364589252875,
22
+ "collapse/effective_rank_ratio": 0.02123970542676904,
23
+ "collapse/embedding_count": 192.0,
24
+ "collapse/embedding_norm_mean": 14.346564877617572,
25
+ "collapse/latent_dim": 256.0,
26
+ "collapse/nearest_neighbor_entropy": 4.85942433071649,
27
+ "collapse/pairwise_cosine_mean": 0.008317808396257337,
28
+ "collapse/per_dim_variance_max": 3.289028716596054,
29
+ "collapse/per_dim_variance_median": 0.6952048902794287,
30
+ "collapse/per_dim_variance_min": 0.0540634111270202,
31
+ "loss/prediction_mse": 0.005166558548808098,
32
+ "loss/sigreg": 1.203125,
33
+ "loss/sigreg_weighted": 0.1083984375,
34
+ "loss/total": 0.11356499791145325,
35
+ "train/examples": 3833852.0,
36
+ "train/examples_per_second": 1103.204300183749,
37
+ "train/gradient_norm": 1.8980227708816528,
38
+ "val/loss/prediction_mse": 0.17075516851175399,
39
+ "val/loss/sigreg": 2.048149585723877,
40
+ "val/loss/sigreg_weighted": 0.1843334688317208,
41
+ "val/loss/total": 0.3550886376982644
42
+ },
43
+ "run_id": "codelewm_scaled_gpu_a10g",
44
+ "runtime": {
45
+ "device": "cuda",
46
+ "dtype": "torch.bfloat16",
47
+ "precision": "bf16-mixed",
48
+ "torch": "2.12.0+cu130"
49
+ },
50
+ "schema_version": "codelewm.torch_training_report.v1",
51
+ "step_count": 60000
52
+ }
checkpoints/codelewm-scaled-20260520-9699b53/training_manifest.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_manifest_id": "training_run-d9074199c0d58911",
3
+ "artifact_manifest_path": "manifest.json",
4
+ "checkpoint_files": [
5
+ {
6
+ "bytes": 351495453,
7
+ "path": "checkpoints/checkpoint.pt",
8
+ "sha256": "09bf8d3880ec272a858dd9b19f2b29622a66a5ebbef6dbd1f8e4ebeb8b6392b8"
9
+ },
10
+ {
11
+ "bytes": 511,
12
+ "path": "checkpoints/checkpoint.pt.manifest.json",
13
+ "sha256": "b18a57a783d2c4693f7c99489ad4752ce6fa1ba8b64a61ccef688dfb599189fd"
14
+ }
15
+ ],
16
+ "config_path": "config.json",
17
+ "config_sha256": "119507eb9909ba9c8dac134595266f5a941ea5c1d096ceeb61679da14d336dad",
18
+ "dataset_manifest_path": ".artifacts/hf/codelewm-scaled-20260520-9699b53/pack/manifest.json",
19
+ "final_metrics": {
20
+ "collapse/effective_rank": 5.437364589252875,
21
+ "collapse/effective_rank_ratio": 0.02123970542676904,
22
+ "collapse/embedding_count": 192.0,
23
+ "collapse/embedding_norm_mean": 14.346564877617572,
24
+ "collapse/latent_dim": 256.0,
25
+ "collapse/nearest_neighbor_entropy": 4.85942433071649,
26
+ "collapse/pairwise_cosine_mean": 0.008317808396257337,
27
+ "collapse/per_dim_variance_max": 3.289028716596054,
28
+ "collapse/per_dim_variance_median": 0.6952048902794287,
29
+ "collapse/per_dim_variance_min": 0.0540634111270202,
30
+ "loss/prediction_mse": 0.005166558548808098,
31
+ "loss/sigreg": 1.203125,
32
+ "loss/sigreg_weighted": 0.1083984375,
33
+ "loss/total": 0.11356499791145325,
34
+ "train/examples": 3833852.0,
35
+ "train/examples_per_second": 1103.204300183749,
36
+ "train/gradient_norm": 1.8980227708816528,
37
+ "val/loss/prediction_mse": 0.17075516851175399,
38
+ "val/loss/sigreg": 2.048149585723877,
39
+ "val/loss/sigreg_weighted": 0.1843334688317208,
40
+ "val/loss/total": 0.3550886376982644
41
+ },
42
+ "metadata": {
43
+ "executor": {
44
+ "checkpoint_schema_version": "codelewm.checkpoint.v1",
45
+ "device": "cuda",
46
+ "executor": "torch",
47
+ "precision": "bf16-mixed",
48
+ "torch": "2.12.0+cu130",
49
+ "train_rows": 18019,
50
+ "val_rows": 1291
51
+ }
52
+ },
53
+ "metrics_path": "metrics.jsonl",
54
+ "metrics_report_path": "reports/metrics_report.json",
55
+ "parent_artifacts": [
56
+ "dataset-ef8ad3f4f48dea9e"
57
+ ],
58
+ "report_files": [
59
+ {
60
+ "bytes": 1893,
61
+ "path": "reports/torch_training_report.json",
62
+ "sha256": "12d3ace9319d0c1b379d32986365fc2b20baad839c2f20ba2a86d249b4672004"
63
+ }
64
+ ],
65
+ "run_id": "codelewm_scaled_gpu_a10g",
66
+ "schema_version": "codelewm.training_run.v1",
67
+ "seed": 240119,
68
+ "step_count": 60000
69
+ }