Charlie81 commited on Aug 28, 2025

Commit

1400f6b

1 Parent(s): d05c72b

delete checkpoints

Browse files

Files changed (22) hide show

checkpoints/checkpoint-60/config.json +0 -39
checkpoints/checkpoint-60/generation_config.json +0 -6
checkpoints/checkpoint-60/model-00001-of-00003.safetensors +0 -3
checkpoints/checkpoint-60/model-00002-of-00003.safetensors +0 -3
checkpoints/checkpoint-60/model-00003-of-00003.safetensors +0 -3
checkpoints/checkpoint-60/model.safetensors.index.json +0 -0
checkpoints/checkpoint-60/optimizer.pt +0 -3
checkpoints/checkpoint-60/rng_state.pth +0 -3
checkpoints/checkpoint-60/scheduler.pt +0 -3
checkpoints/checkpoint-60/trainer_state.json +0 -76
checkpoints/checkpoint-60/training_args.bin +0 -3
checkpoints/checkpoint-80/config.json +0 -39
checkpoints/checkpoint-80/generation_config.json +0 -6
checkpoints/checkpoint-80/model-00001-of-00003.safetensors +0 -3
checkpoints/checkpoint-80/model-00002-of-00003.safetensors +0 -3
checkpoints/checkpoint-80/model-00003-of-00003.safetensors +0 -3
checkpoints/checkpoint-80/model.safetensors.index.json +0 -0
checkpoints/checkpoint-80/optimizer.pt +0 -3
checkpoints/checkpoint-80/rng_state.pth +0 -3
checkpoints/checkpoint-80/scheduler.pt +0 -3
checkpoints/checkpoint-80/trainer_state.json +0 -90
checkpoints/checkpoint-80/training_args.bin +0 -3

checkpoints/checkpoint-60/config.json DELETED Viewed

@@ -1,39 +0,0 @@
-{
-  "architectures": [
-    "MyOlmoeForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "clip_qkv": null,
-  "eos_token_id": 50279,
-  "hidden_act": "silu",
-  "hidden_size": 2048,
-  "initializer_range": 0.02,
-  "intermediate_size": 1024,
-  "max_position_embeddings": 4096,
-  "max_small_expert_count": 64,
-  "model_type": "olmoe",
-  "norm_topk_prob": false,
-  "num_attention_heads": 16,
-  "num_experts": 64,
-  "num_experts_per_tok": 2,
-  "num_hidden_layers": 16,
-  "num_key_value_heads": 16,
-  "num_small_experts": 64,
-  "output_router_logits": false,
-  "pad_token_id": 1,
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
-  "rope_theta": 10000.0,
-  "router_aux_loss_coef": 0.01,
-  "small_expert_count": 64,
-  "small_expert_intermediate_ratio": 64,
-  "small_expert_intermediate_size": 0,
-  "small_expert_sparsity_coef": 0.1,
-  "small_expert_strategy": "constant",
-  "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.2",
-  "use_cache": true,
-  "vocab_size": 50304
-}

checkpoints/checkpoint-60/generation_config.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "_from_model_config": true,
-  "eos_token_id": 50279,
-  "pad_token_id": 1,
-  "transformers_version": "4.55.2"
-}

checkpoints/checkpoint-60/model-00001-of-00003.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9a1b9651bad1a045a178e22cf198d5070c94d4374f6331086e38801fe8d88ca3
-size 4997482624

checkpoints/checkpoint-60/model-00002-of-00003.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:025d40cedfb2f13f4718def7d040eca03796140d639f599d9eca5bddda51839a
-size 4997867120

checkpoints/checkpoint-60/model-00003-of-00003.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:928fe194d9b0d49311de80482038d636aded9ebe83b373ca36e2fff712895265
-size 3856242664

checkpoints/checkpoint-60/model.safetensors.index.json DELETED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/checkpoint-60/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0363c4418f9eb060e1b46f43324bf9f6da0ab67754604a2fe1c3ae1d08a99c32
-size 25858571

checkpoints/checkpoint-60/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
-size 14645

checkpoints/checkpoint-60/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6e1be529198179cd559ddcb4c59a9f665944a456be4a70f4f5dcf79350fe0534
-size 1465

checkpoints/checkpoint-60/trainer_state.json DELETED Viewed

@@ -1,76 +0,0 @@
-{
-  "best_global_step": null,
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.0029433948380213027,
-  "eval_steps": 500,
-  "global_step": 60,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.0004905658063368838,
-      "grad_norm": 32.25,
-      "learning_rate": 1.471550032701112e-07,
-      "loss": 104.4204,
-      "step": 10
-    },
-    {
-      "epoch": 0.0009811316126737675,
-      "grad_norm": 33.25,
-      "learning_rate": 3.106605624591236e-07,
-      "loss": 105.3427,
-      "step": 20
-    },
-    {
-      "epoch": 0.0014716974190106514,
-      "grad_norm": 37.25,
-      "learning_rate": 4.7416612164813603e-07,
-      "loss": 105.0909,
-      "step": 30
-    },
-    {
-      "epoch": 0.001962263225347535,
-      "grad_norm": 27.75,
-      "learning_rate": 6.376716808371485e-07,
-      "loss": 105.2529,
-      "step": 40
-    },
-    {
-      "epoch": 0.002452829031684419,
-      "grad_norm": 27.5,
-      "learning_rate": 8.011772400261609e-07,
-      "loss": 105.3971,
-      "step": 50
-    },
-    {
-      "epoch": 0.0029433948380213027,
-      "grad_norm": 30.75,
-      "learning_rate": 9.646827992151733e-07,
-      "loss": 105.0396,
-      "step": 60
-    }
-  ],
-  "logging_steps": 10,
-  "max_steps": 61155,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
-  "save_steps": 20,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": false
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1.6096442745618432e+17,
-  "train_batch_size": 2,
-  "trial_name": null,
-  "trial_params": null
-}

checkpoints/checkpoint-60/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ffa490bde32401dd6d70c4f1b1cff8f5df114f94b79824d3a47b2ae8c00b822d
-size 5713

checkpoints/checkpoint-80/config.json DELETED Viewed

@@ -1,39 +0,0 @@
-{
-  "architectures": [
-    "MyOlmoeForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "clip_qkv": null,
-  "eos_token_id": 50279,
-  "hidden_act": "silu",
-  "hidden_size": 2048,
-  "initializer_range": 0.02,
-  "intermediate_size": 1024,
-  "max_position_embeddings": 4096,
-  "max_small_expert_count": 64,
-  "model_type": "olmoe",
-  "norm_topk_prob": false,
-  "num_attention_heads": 16,
-  "num_experts": 64,
-  "num_experts_per_tok": 2,
-  "num_hidden_layers": 16,
-  "num_key_value_heads": 16,
-  "num_small_experts": 64,
-  "output_router_logits": false,
-  "pad_token_id": 1,
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
-  "rope_theta": 10000.0,
-  "router_aux_loss_coef": 0.01,
-  "small_expert_count": 64,
-  "small_expert_intermediate_ratio": 64,
-  "small_expert_intermediate_size": 0,
-  "small_expert_sparsity_coef": 0.1,
-  "small_expert_strategy": "constant",
-  "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.2",
-  "use_cache": true,
-  "vocab_size": 50304
-}

checkpoints/checkpoint-80/generation_config.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "_from_model_config": true,
-  "eos_token_id": 50279,
-  "pad_token_id": 1,
-  "transformers_version": "4.55.2"
-}

checkpoints/checkpoint-80/model-00001-of-00003.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9a1b9651bad1a045a178e22cf198d5070c94d4374f6331086e38801fe8d88ca3
-size 4997482624

checkpoints/checkpoint-80/model-00002-of-00003.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:43fb39d4048f4c7f63e0b0989fc374720af15131f2b290760d93b0eca2f0ca3e
-size 4997867120

checkpoints/checkpoint-80/model-00003-of-00003.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5a9fcd7b16ae46a33e32a0eab4ea6b1a02770b62e98f7ffaba025c588e1336e6
-size 3856242664

checkpoints/checkpoint-80/model.safetensors.index.json DELETED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/checkpoint-80/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:358cc79253a54a16e91c5c08071aba70ca7494d5c48e423c55d77d2ba49212bf
-size 25858571

checkpoints/checkpoint-80/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
-size 14645

checkpoints/checkpoint-80/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d484e81fa22cdbcf66c2585a42950fa1db3e6db36dd8936a72f76c2844202008
-size 1465

checkpoints/checkpoint-80/trainer_state.json DELETED Viewed

@@ -1,90 +0,0 @@
-{
-  "best_global_step": null,
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.00392452645069507,
-  "eval_steps": 500,
-  "global_step": 80,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.0004905658063368838,
-      "grad_norm": 32.25,
-      "learning_rate": 1.471550032701112e-07,
-      "loss": 104.4204,
-      "step": 10
-    },
-    {
-      "epoch": 0.0009811316126737675,
-      "grad_norm": 33.25,
-      "learning_rate": 3.106605624591236e-07,
-      "loss": 105.3427,
-      "step": 20
-    },
-    {
-      "epoch": 0.0014716974190106514,
-      "grad_norm": 37.25,
-      "learning_rate": 4.7416612164813603e-07,
-      "loss": 105.0909,
-      "step": 30
-    },
-    {
-      "epoch": 0.001962263225347535,
-      "grad_norm": 27.75,
-      "learning_rate": 6.376716808371485e-07,
-      "loss": 105.2529,
-      "step": 40
-    },
-    {
-      "epoch": 0.002452829031684419,
-      "grad_norm": 27.5,
-      "learning_rate": 8.011772400261609e-07,
-      "loss": 105.3971,
-      "step": 50
-    },
-    {
-      "epoch": 0.0029433948380213027,
-      "grad_norm": 30.75,
-      "learning_rate": 9.646827992151733e-07,
-      "loss": 105.0396,
-      "step": 60
-    },
-    {
-      "epoch": 0.003433960644358187,
-      "grad_norm": 27.75,
-      "learning_rate": 1.1281883584041859e-06,
-      "loss": 104.2232,
-      "step": 70
-    },
-    {
-      "epoch": 0.00392452645069507,
-      "grad_norm": 29.625,
-      "learning_rate": 1.2916939175931983e-06,
-      "loss": 104.437,
-      "step": 80
-    }
-  ],
-  "logging_steps": 10,
-  "max_steps": 61155,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
-  "save_steps": 20,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": false
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 2.1461923660824576e+17,
-  "train_batch_size": 2,
-  "trial_name": null,
-  "trial_params": null
-}

checkpoints/checkpoint-80/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ffa490bde32401dd6d70c4f1b1cff8f5df114f94b79824d3a47b2ae8c00b822d
-size 5713