Add CodeLeWM checkpoint artifact codelewm-action-use-retrieval-20260520-7895d18
Browse files- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/checkpoints/checkpoint.pt +3 -0
- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/checkpoints/checkpoint.pt.manifest.json +14 -0
- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/config.json +54 -0
- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/manifest.json +115 -0
- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/metrics.jsonl +1 -0
- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/reports/metrics_report.json +36 -0
- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/reports/torch_training_report.json +71 -0
- checkpoints/codelewm-action-use-retrieval-20260520-7895d18/training_manifest.json +88 -0
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/checkpoints/checkpoint.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda
|
| 3 |
+
size 351495901
|
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/checkpoints/checkpoint.pt.manifest.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"checkpoint_path": "checkpoint.pt",
|
| 3 |
+
"checkpoint_sha256": "0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda",
|
| 4 |
+
"metadata": {
|
| 5 |
+
"action_view": "text",
|
| 6 |
+
"config_hash": "9ac2ec1d60645bf2eb351d4799f5866533a26a39d7b3dec8dc064988c19c8cf8",
|
| 7 |
+
"latent_dim": 256,
|
| 8 |
+
"model_class": "TorchCodeTransitionModel",
|
| 9 |
+
"record_schema_version": "codelewm.transition.v1",
|
| 10 |
+
"schema_version": "codelewm.checkpoint.v1"
|
| 11 |
+
},
|
| 12 |
+
"migration_hook": null,
|
| 13 |
+
"schema_version": "codelewm.checkpoint.v1"
|
| 14 |
+
}
|
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/config.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"data": {
|
| 3 |
+
"manifest": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/manifest.json",
|
| 4 |
+
"train": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/hdf5/train.hdf5",
|
| 5 |
+
"val": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/hdf5/val.hdf5"
|
| 6 |
+
},
|
| 7 |
+
"loader": {
|
| 8 |
+
"batch_size": 64,
|
| 9 |
+
"num_workers": 4,
|
| 10 |
+
"persistent_workers": true,
|
| 11 |
+
"pin_memory": true,
|
| 12 |
+
"shuffle": true
|
| 13 |
+
},
|
| 14 |
+
"loss": {
|
| 15 |
+
"action_use_margin": 0.02,
|
| 16 |
+
"action_use_margin_weight": 0.25,
|
| 17 |
+
"enable_action_use_margin": true,
|
| 18 |
+
"enable_retrieval_loss": true,
|
| 19 |
+
"retrieval_temperature": 0.1,
|
| 20 |
+
"retrieval_weight": 0.05,
|
| 21 |
+
"sigreg_knots": 17,
|
| 22 |
+
"sigreg_num_proj": 1024,
|
| 23 |
+
"sigreg_weight": 0.09
|
| 24 |
+
},
|
| 25 |
+
"name": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
|
| 26 |
+
"optimizer": {
|
| 27 |
+
"lr": 0.0001,
|
| 28 |
+
"type": "AdamW",
|
| 29 |
+
"weight_decay": 0.001
|
| 30 |
+
},
|
| 31 |
+
"output": {
|
| 32 |
+
"checkpoint_dir": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train/checkpoints",
|
| 33 |
+
"manifest_path": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train/training_manifest.json",
|
| 34 |
+
"metrics_path": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train/metrics.jsonl",
|
| 35 |
+
"run_dir": "/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/train"
|
| 36 |
+
},
|
| 37 |
+
"schema_version": "codelewm.train_config.v1",
|
| 38 |
+
"seed": 240119,
|
| 39 |
+
"trainer": {
|
| 40 |
+
"accelerator": "gpu",
|
| 41 |
+
"devices": 1,
|
| 42 |
+
"gradient_clip_val": 1.0,
|
| 43 |
+
"max_steps": 60000,
|
| 44 |
+
"precision": "bf16-mixed"
|
| 45 |
+
},
|
| 46 |
+
"wm": {
|
| 47 |
+
"action_sequence_length": 256,
|
| 48 |
+
"action_view": "text",
|
| 49 |
+
"embed_dim": 256,
|
| 50 |
+
"history_size": 1,
|
| 51 |
+
"num_preds": 1,
|
| 52 |
+
"state_sequence_length": 1024
|
| 53 |
+
}
|
| 54 |
+
}
|
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/manifest.json
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"artifact_id": "training_run-924cd056375f11ea",
|
| 3 |
+
"artifact_kind": "training_run",
|
| 4 |
+
"command": [
|
| 5 |
+
"codelewm",
|
| 6 |
+
"train",
|
| 7 |
+
"--config",
|
| 8 |
+
"/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/configs/train.json",
|
| 9 |
+
"--executor",
|
| 10 |
+
"torch",
|
| 11 |
+
"--device",
|
| 12 |
+
"auto",
|
| 13 |
+
"--overwrite",
|
| 14 |
+
"--json",
|
| 15 |
+
"--log-jsonl",
|
| 16 |
+
"/CodeLeWM/.artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/logs/train.jsonl"
|
| 17 |
+
],
|
| 18 |
+
"config_sha256": "a8c40e6dad64a96410b9855ce61146a85aa336325980c7a1080943b227e1ffc2",
|
| 19 |
+
"created_at": "2026-05-20T13:13:45Z",
|
| 20 |
+
"files": [
|
| 21 |
+
{
|
| 22 |
+
"bytes": 1789,
|
| 23 |
+
"path": "config.json",
|
| 24 |
+
"sha256": "4e2ad375b3fc132229ada18118a577dc882d68704cb9b317735b03579563f532"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"bytes": 1472,
|
| 28 |
+
"path": "metrics.jsonl",
|
| 29 |
+
"sha256": "9d8b35638953b0648c66d4f69e3e00d554a5c7efb76dd9afa553cce27d6a9eb3"
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"bytes": 1614,
|
| 33 |
+
"path": "reports/metrics_report.json",
|
| 34 |
+
"sha256": "74270302bccf8913005b5177d8157266a9ba3f55746e296e2fa99fce66ed70de"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"bytes": 351495901,
|
| 38 |
+
"path": "checkpoints/checkpoint.pt",
|
| 39 |
+
"sha256": "0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda"
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"bytes": 511,
|
| 43 |
+
"path": "checkpoints/checkpoint.pt.manifest.json",
|
| 44 |
+
"sha256": "9b8cb47facfa9f3c721fa845df0e374aa16692a63f4a93a06b806a95eb4ffeb9"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"bytes": 2667,
|
| 48 |
+
"path": "reports/torch_training_report.json",
|
| 49 |
+
"sha256": "7f2a603f05fd92497c5aaa854d9ba73476c285030176ed068afb62976bb0ec4b"
|
| 50 |
+
}
|
| 51 |
+
],
|
| 52 |
+
"metadata": {
|
| 53 |
+
"dataset_manifest_path": ".artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/manifest.json",
|
| 54 |
+
"executor": {
|
| 55 |
+
"checkpoint_schema_version": "codelewm.checkpoint.v1",
|
| 56 |
+
"device": "cuda",
|
| 57 |
+
"executor": "torch",
|
| 58 |
+
"objective": {
|
| 59 |
+
"action_use_margin": 0.02,
|
| 60 |
+
"action_use_margin_weight": 0.25,
|
| 61 |
+
"enable_action_use_margin": true,
|
| 62 |
+
"enable_retrieval_loss": true,
|
| 63 |
+
"retrieval_temperature": 0.1,
|
| 64 |
+
"retrieval_weight": 0.05,
|
| 65 |
+
"sigreg_knots": 17,
|
| 66 |
+
"sigreg_num_proj": 1024,
|
| 67 |
+
"sigreg_weight": 0.09
|
| 68 |
+
},
|
| 69 |
+
"precision": "bf16-mixed",
|
| 70 |
+
"torch": "2.12.0+cu130",
|
| 71 |
+
"train_rows": 18019,
|
| 72 |
+
"val_rows": 1291
|
| 73 |
+
},
|
| 74 |
+
"final_metrics": {
|
| 75 |
+
"collapse/effective_rank": 10.542142224670206,
|
| 76 |
+
"collapse/effective_rank_ratio": 0.04118024306511799,
|
| 77 |
+
"collapse/embedding_count": 192.0,
|
| 78 |
+
"collapse/embedding_norm_mean": 13.19682605659637,
|
| 79 |
+
"collapse/latent_dim": 256.0,
|
| 80 |
+
"collapse/nearest_neighbor_entropy": 4.847709015203599,
|
| 81 |
+
"collapse/pairwise_cosine_mean": 0.004205391777077934,
|
| 82 |
+
"collapse/per_dim_variance_max": 2.3262929385455555,
|
| 83 |
+
"collapse/per_dim_variance_median": 0.6143968449321122,
|
| 84 |
+
"collapse/per_dim_variance_min": 0.06425978866539729,
|
| 85 |
+
"loss/action_use_margin": 0.005474980920553207,
|
| 86 |
+
"loss/action_use_margin_weighted": 0.0013687452301383018,
|
| 87 |
+
"loss/prediction_mse": 0.006972212344408035,
|
| 88 |
+
"loss/retrieval": 0.20804595947265625,
|
| 89 |
+
"loss/retrieval_weighted": 0.010402298532426357,
|
| 90 |
+
"loss/sigreg": 0.91015625,
|
| 91 |
+
"loss/sigreg_weighted": 0.08203125,
|
| 92 |
+
"loss/total": 0.10077450424432755,
|
| 93 |
+
"train/examples": 3833852.0,
|
| 94 |
+
"train/examples_per_second": 1064.5055633910936,
|
| 95 |
+
"train/gradient_norm": 1.3148689270019531,
|
| 96 |
+
"val/loss/action_use_margin": 0.05166880839637348,
|
| 97 |
+
"val/loss/action_use_margin_weighted": 0.01291720209909337,
|
| 98 |
+
"val/loss/prediction_mse": 0.18427681780996777,
|
| 99 |
+
"val/loss/retrieval": 0.9156766051337832,
|
| 100 |
+
"val/loss/retrieval_weighted": 0.04578383116140252,
|
| 101 |
+
"val/loss/sigreg": 1.887503743171692,
|
| 102 |
+
"val/loss/sigreg_weighted": 0.16987534293106624,
|
| 103 |
+
"val/loss/total": 0.4128531955537342
|
| 104 |
+
},
|
| 105 |
+
"run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
|
| 106 |
+
"schema_version": "codelewm.training_run.v1",
|
| 107 |
+
"seed": 240119,
|
| 108 |
+
"step_count": 60000
|
| 109 |
+
},
|
| 110 |
+
"parent_artifacts": [
|
| 111 |
+
"dataset-5695087296ce4a97"
|
| 112 |
+
],
|
| 113 |
+
"schema_version": "codelewm.artifact_manifest.v1",
|
| 114 |
+
"source_git_sha": "7895d185e165a917af0956a313d8948c04b33638"
|
| 115 |
+
}
|
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/metrics.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"metrics": {"collapse/effective_rank": 10.542142224670206, "collapse/effective_rank_ratio": 0.04118024306511799, "collapse/embedding_count": 192.0, "collapse/embedding_norm_mean": 13.19682605659637, "collapse/latent_dim": 256.0, "collapse/nearest_neighbor_entropy": 4.847709015203599, "collapse/pairwise_cosine_mean": 0.004205391777077934, "collapse/per_dim_variance_max": 2.3262929385455555, "collapse/per_dim_variance_median": 0.6143968449321122, "collapse/per_dim_variance_min": 0.06425978866539729, "loss/action_use_margin": 0.005474980920553207, "loss/action_use_margin_weighted": 0.0013687452301383018, "loss/prediction_mse": 0.006972212344408035, "loss/retrieval": 0.20804595947265625, "loss/retrieval_weighted": 0.010402298532426357, "loss/sigreg": 0.91015625, "loss/sigreg_weighted": 0.08203125, "loss/total": 0.10077450424432755, "train/examples": 3833852.0, "train/examples_per_second": 1064.5055633910936, "train/gradient_norm": 1.3148689270019531, "val/loss/action_use_margin": 0.05166880839637348, "val/loss/action_use_margin_weighted": 0.01291720209909337, "val/loss/prediction_mse": 0.18427681780996777, "val/loss/retrieval": 0.9156766051337832, "val/loss/retrieval_weighted": 0.04578383116140252, "val/loss/sigreg": 1.887503743171692, "val/loss/sigreg_weighted": 0.16987534293106624, "val/loss/total": 0.4128531955537342}, "run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g", "schema_version": "codelewm.training_metrics.v1", "step": 60000}
|
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/reports/metrics_report.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"final_metrics": {
|
| 3 |
+
"collapse/effective_rank": 10.542142224670206,
|
| 4 |
+
"collapse/effective_rank_ratio": 0.04118024306511799,
|
| 5 |
+
"collapse/embedding_count": 192.0,
|
| 6 |
+
"collapse/embedding_norm_mean": 13.19682605659637,
|
| 7 |
+
"collapse/latent_dim": 256.0,
|
| 8 |
+
"collapse/nearest_neighbor_entropy": 4.847709015203599,
|
| 9 |
+
"collapse/pairwise_cosine_mean": 0.004205391777077934,
|
| 10 |
+
"collapse/per_dim_variance_max": 2.3262929385455555,
|
| 11 |
+
"collapse/per_dim_variance_median": 0.6143968449321122,
|
| 12 |
+
"collapse/per_dim_variance_min": 0.06425978866539729,
|
| 13 |
+
"loss/action_use_margin": 0.005474980920553207,
|
| 14 |
+
"loss/action_use_margin_weighted": 0.0013687452301383018,
|
| 15 |
+
"loss/prediction_mse": 0.006972212344408035,
|
| 16 |
+
"loss/retrieval": 0.20804595947265625,
|
| 17 |
+
"loss/retrieval_weighted": 0.010402298532426357,
|
| 18 |
+
"loss/sigreg": 0.91015625,
|
| 19 |
+
"loss/sigreg_weighted": 0.08203125,
|
| 20 |
+
"loss/total": 0.10077450424432755,
|
| 21 |
+
"train/examples": 3833852.0,
|
| 22 |
+
"train/examples_per_second": 1064.5055633910936,
|
| 23 |
+
"train/gradient_norm": 1.3148689270019531,
|
| 24 |
+
"val/loss/action_use_margin": 0.05166880839637348,
|
| 25 |
+
"val/loss/action_use_margin_weighted": 0.01291720209909337,
|
| 26 |
+
"val/loss/prediction_mse": 0.18427681780996777,
|
| 27 |
+
"val/loss/retrieval": 0.9156766051337832,
|
| 28 |
+
"val/loss/retrieval_weighted": 0.04578383116140252,
|
| 29 |
+
"val/loss/sigreg": 1.887503743171692,
|
| 30 |
+
"val/loss/sigreg_weighted": 0.16987534293106624,
|
| 31 |
+
"val/loss/total": 0.4128531955537342
|
| 32 |
+
},
|
| 33 |
+
"run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
|
| 34 |
+
"schema_version": "codelewm.training_metrics.v1",
|
| 35 |
+
"step_count": 60000
|
| 36 |
+
}
|
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/reports/torch_training_report.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"collapse_report": {
|
| 3 |
+
"effective_rank": 10.542142224670206,
|
| 4 |
+
"effective_rank_ratio": 0.04118024306511799,
|
| 5 |
+
"embedding_count": 192,
|
| 6 |
+
"embedding_norm_mean": 13.19682605659637,
|
| 7 |
+
"latent_dim": 256,
|
| 8 |
+
"nearest_neighbor_entropy": 4.847709015203599,
|
| 9 |
+
"pairwise_cosine_mean": 0.004205391777077934,
|
| 10 |
+
"per_dim_variance_max": 2.3262929385455555,
|
| 11 |
+
"per_dim_variance_median": 0.6143968449321122,
|
| 12 |
+
"per_dim_variance_min": 0.06425978866539729,
|
| 13 |
+
"schema_version": "codelewm.eval.collapse_report.v1"
|
| 14 |
+
},
|
| 15 |
+
"dataset": {
|
| 16 |
+
"action_view": "text",
|
| 17 |
+
"train_rows": 18019,
|
| 18 |
+
"val_rows": 1291
|
| 19 |
+
},
|
| 20 |
+
"metrics": {
|
| 21 |
+
"collapse/effective_rank": 10.542142224670206,
|
| 22 |
+
"collapse/effective_rank_ratio": 0.04118024306511799,
|
| 23 |
+
"collapse/embedding_count": 192.0,
|
| 24 |
+
"collapse/embedding_norm_mean": 13.19682605659637,
|
| 25 |
+
"collapse/latent_dim": 256.0,
|
| 26 |
+
"collapse/nearest_neighbor_entropy": 4.847709015203599,
|
| 27 |
+
"collapse/pairwise_cosine_mean": 0.004205391777077934,
|
| 28 |
+
"collapse/per_dim_variance_max": 2.3262929385455555,
|
| 29 |
+
"collapse/per_dim_variance_median": 0.6143968449321122,
|
| 30 |
+
"collapse/per_dim_variance_min": 0.06425978866539729,
|
| 31 |
+
"loss/action_use_margin": 0.005474980920553207,
|
| 32 |
+
"loss/action_use_margin_weighted": 0.0013687452301383018,
|
| 33 |
+
"loss/prediction_mse": 0.006972212344408035,
|
| 34 |
+
"loss/retrieval": 0.20804595947265625,
|
| 35 |
+
"loss/retrieval_weighted": 0.010402298532426357,
|
| 36 |
+
"loss/sigreg": 0.91015625,
|
| 37 |
+
"loss/sigreg_weighted": 0.08203125,
|
| 38 |
+
"loss/total": 0.10077450424432755,
|
| 39 |
+
"train/examples": 3833852.0,
|
| 40 |
+
"train/examples_per_second": 1064.5055633910936,
|
| 41 |
+
"train/gradient_norm": 1.3148689270019531,
|
| 42 |
+
"val/loss/action_use_margin": 0.05166880839637348,
|
| 43 |
+
"val/loss/action_use_margin_weighted": 0.01291720209909337,
|
| 44 |
+
"val/loss/prediction_mse": 0.18427681780996777,
|
| 45 |
+
"val/loss/retrieval": 0.9156766051337832,
|
| 46 |
+
"val/loss/retrieval_weighted": 0.04578383116140252,
|
| 47 |
+
"val/loss/sigreg": 1.887503743171692,
|
| 48 |
+
"val/loss/sigreg_weighted": 0.16987534293106624,
|
| 49 |
+
"val/loss/total": 0.4128531955537342
|
| 50 |
+
},
|
| 51 |
+
"objective": {
|
| 52 |
+
"action_use_margin": 0.02,
|
| 53 |
+
"action_use_margin_weight": 0.25,
|
| 54 |
+
"enable_action_use_margin": true,
|
| 55 |
+
"enable_retrieval_loss": true,
|
| 56 |
+
"retrieval_temperature": 0.1,
|
| 57 |
+
"retrieval_weight": 0.05,
|
| 58 |
+
"sigreg_knots": 17,
|
| 59 |
+
"sigreg_num_proj": 1024,
|
| 60 |
+
"sigreg_weight": 0.09
|
| 61 |
+
},
|
| 62 |
+
"run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
|
| 63 |
+
"runtime": {
|
| 64 |
+
"device": "cuda",
|
| 65 |
+
"dtype": "torch.bfloat16",
|
| 66 |
+
"precision": "bf16-mixed",
|
| 67 |
+
"torch": "2.12.0+cu130"
|
| 68 |
+
},
|
| 69 |
+
"schema_version": "codelewm.torch_training_report.v1",
|
| 70 |
+
"step_count": 60000
|
| 71 |
+
}
|
checkpoints/codelewm-action-use-retrieval-20260520-7895d18/training_manifest.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"artifact_manifest_id": "training_run-924cd056375f11ea",
|
| 3 |
+
"artifact_manifest_path": "manifest.json",
|
| 4 |
+
"checkpoint_files": [
|
| 5 |
+
{
|
| 6 |
+
"bytes": 351495901,
|
| 7 |
+
"path": "checkpoints/checkpoint.pt",
|
| 8 |
+
"sha256": "0cb4daf1500495579f5c59cc9fd8aa39f5f70e88f55c0c121320d023b43ddeda"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"bytes": 511,
|
| 12 |
+
"path": "checkpoints/checkpoint.pt.manifest.json",
|
| 13 |
+
"sha256": "9b8cb47facfa9f3c721fa845df0e374aa16692a63f4a93a06b806a95eb4ffeb9"
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
+
"config_path": "config.json",
|
| 17 |
+
"config_sha256": "a8c40e6dad64a96410b9855ce61146a85aa336325980c7a1080943b227e1ffc2",
|
| 18 |
+
"dataset_manifest_path": ".artifacts/hf/codelewm-action-use-retrieval-20260520-7895d18/pack/manifest.json",
|
| 19 |
+
"final_metrics": {
|
| 20 |
+
"collapse/effective_rank": 10.542142224670206,
|
| 21 |
+
"collapse/effective_rank_ratio": 0.04118024306511799,
|
| 22 |
+
"collapse/embedding_count": 192.0,
|
| 23 |
+
"collapse/embedding_norm_mean": 13.19682605659637,
|
| 24 |
+
"collapse/latent_dim": 256.0,
|
| 25 |
+
"collapse/nearest_neighbor_entropy": 4.847709015203599,
|
| 26 |
+
"collapse/pairwise_cosine_mean": 0.004205391777077934,
|
| 27 |
+
"collapse/per_dim_variance_max": 2.3262929385455555,
|
| 28 |
+
"collapse/per_dim_variance_median": 0.6143968449321122,
|
| 29 |
+
"collapse/per_dim_variance_min": 0.06425978866539729,
|
| 30 |
+
"loss/action_use_margin": 0.005474980920553207,
|
| 31 |
+
"loss/action_use_margin_weighted": 0.0013687452301383018,
|
| 32 |
+
"loss/prediction_mse": 0.006972212344408035,
|
| 33 |
+
"loss/retrieval": 0.20804595947265625,
|
| 34 |
+
"loss/retrieval_weighted": 0.010402298532426357,
|
| 35 |
+
"loss/sigreg": 0.91015625,
|
| 36 |
+
"loss/sigreg_weighted": 0.08203125,
|
| 37 |
+
"loss/total": 0.10077450424432755,
|
| 38 |
+
"train/examples": 3833852.0,
|
| 39 |
+
"train/examples_per_second": 1064.5055633910936,
|
| 40 |
+
"train/gradient_norm": 1.3148689270019531,
|
| 41 |
+
"val/loss/action_use_margin": 0.05166880839637348,
|
| 42 |
+
"val/loss/action_use_margin_weighted": 0.01291720209909337,
|
| 43 |
+
"val/loss/prediction_mse": 0.18427681780996777,
|
| 44 |
+
"val/loss/retrieval": 0.9156766051337832,
|
| 45 |
+
"val/loss/retrieval_weighted": 0.04578383116140252,
|
| 46 |
+
"val/loss/sigreg": 1.887503743171692,
|
| 47 |
+
"val/loss/sigreg_weighted": 0.16987534293106624,
|
| 48 |
+
"val/loss/total": 0.4128531955537342
|
| 49 |
+
},
|
| 50 |
+
"metadata": {
|
| 51 |
+
"executor": {
|
| 52 |
+
"checkpoint_schema_version": "codelewm.checkpoint.v1",
|
| 53 |
+
"device": "cuda",
|
| 54 |
+
"executor": "torch",
|
| 55 |
+
"objective": {
|
| 56 |
+
"action_use_margin": 0.02,
|
| 57 |
+
"action_use_margin_weight": 0.25,
|
| 58 |
+
"enable_action_use_margin": true,
|
| 59 |
+
"enable_retrieval_loss": true,
|
| 60 |
+
"retrieval_temperature": 0.1,
|
| 61 |
+
"retrieval_weight": 0.05,
|
| 62 |
+
"sigreg_knots": 17,
|
| 63 |
+
"sigreg_num_proj": 1024,
|
| 64 |
+
"sigreg_weight": 0.09
|
| 65 |
+
},
|
| 66 |
+
"precision": "bf16-mixed",
|
| 67 |
+
"torch": "2.12.0+cu130",
|
| 68 |
+
"train_rows": 18019,
|
| 69 |
+
"val_rows": 1291
|
| 70 |
+
}
|
| 71 |
+
},
|
| 72 |
+
"metrics_path": "metrics.jsonl",
|
| 73 |
+
"metrics_report_path": "reports/metrics_report.json",
|
| 74 |
+
"parent_artifacts": [
|
| 75 |
+
"dataset-5695087296ce4a97"
|
| 76 |
+
],
|
| 77 |
+
"report_files": [
|
| 78 |
+
{
|
| 79 |
+
"bytes": 2667,
|
| 80 |
+
"path": "reports/torch_training_report.json",
|
| 81 |
+
"sha256": "7f2a603f05fd92497c5aaa854d9ba73476c285030176ed068afb62976bb0ec4b"
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"run_id": "codelewm_scaled_action_use_margin_retrieval_gpu_a10g",
|
| 85 |
+
"schema_version": "codelewm.training_run.v1",
|
| 86 |
+
"seed": 240119,
|
| 87 |
+
"step_count": 60000
|
| 88 |
+
}
|