FastPlus125m Geodesic Backup checkpoint-105 at global step 105

Browse files

Files changed (11) hide show

checkpoint-105/config.json +19 -0
checkpoint-105/generation_config.json +6 -0
checkpoint-105/model.safetensors +3 -0
checkpoint-105/optimizer.pt +3 -0
checkpoint-105/rng_state.pth +3 -0
checkpoint-105/scaler.pt +3 -0
checkpoint-105/scheduler.pt +3 -0
checkpoint-105/tokenizer.json +0 -0
checkpoint-105/tokenizer_config.json +15 -0
checkpoint-105/trainer_state.json +114 -0
checkpoint-105/training_args.bin +3 -0

checkpoint-105/config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "architectures": [
+    "FastPlus125mForCausalLM"
+  ],
+  "dtype": "float32",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "kd_alpha": 0.4,
+  "kd_temperature": 2.5,
+  "max_position_embeddings": 512,
+  "model_type": "fastplus_125m",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 16,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.12.0",
+  "use_cache": false,
+  "vocab_size": 1792
+}

checkpoint-105/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "output_attentions": false,
+  "output_hidden_states": false,
+  "transformers_version": "5.12.0"
+}

checkpoint-105/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d70ae1d4d421134a39bb7bb2497c9e0e29a6696d5439cde6980193f470bba78
+size 475022672

checkpoint-105/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:940f8cc2971db7ee16e49a5cdcc7e1d9bfbca930991a9314512f9823f9f4c4c5
+size 949694539

checkpoint-105/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4a9f217e852f439efa6bd32fde98d6867f11aa6ea13ddc021ba10af6a0b0934
+size 14645

checkpoint-105/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0604fb6aed7728e4c26776057bda0c591a130bb89b1efd24f5a809be15d4fc7
+size 1383

checkpoint-105/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:715d93f94f69cc2a9664de6febc48ab1cd682b7056ab9016d52301f7c2c872b2
+size 7737

checkpoint-105/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-105/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": [
+    "<|user|>",
+    "<|assistant|>"
+  ],
+  "is_local": false,
+  "local_files_only": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<unk>"
+}

checkpoint-105/trainer_state.json ADDED Viewed

	@@ -0,0 +1,114 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 15.0,
+  "eval_steps": 500,
+  "global_step": 105,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.4285714285714286,
+      "grad_norm": 35220.515625,
+      "learning_rate": 0.0,
+      "lorentz_dist_loss": 1.0369,
+      "loss": 12.631961822509766,
+      "step": 10
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 40192.5859375,
+      "learning_rate": 3.06e-06,
+      "lorentz_dist_loss": 1.0303,
+      "loss": 12.552436828613281,
+      "step": 20
+    },
+    {
+      "epoch": 4.285714285714286,
+      "grad_norm": 27514.802734375,
+      "learning_rate": 6.46e-06,
+      "lorentz_dist_loss": 1.0012,
+      "loss": 12.204200744628906,
+      "step": 30
+    },
+    {
+      "epoch": 5.714285714285714,
+      "grad_norm": 43463.203125,
+      "learning_rate": 9.86e-06,
+      "lorentz_dist_loss": 1.0322,
+      "loss": 12.575341033935548,
+      "step": 40
+    },
+    {
+      "epoch": 7.142857142857143,
+      "grad_norm": 46014.42578125,
+      "learning_rate": 1.326e-05,
+      "lorentz_dist_loss": 0.9906,
+      "loss": 12.076918792724609,
+      "step": 50
+    },
+    {
+      "epoch": 8.571428571428571,
+      "grad_norm": 21427.990234375,
+      "learning_rate": 1.6660000000000003e-05,
+      "lorentz_dist_loss": 0.9057,
+      "loss": 11.058214569091797,
+      "step": 60
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 12262.89453125,
+      "learning_rate": 2.006e-05,
+      "lorentz_dist_loss": 0.8058,
+      "loss": 9.858463287353516,
+      "step": 70
+    },
+    {
+      "epoch": 11.428571428571429,
+      "grad_norm": 12813.310546875,
+      "learning_rate": 2.3460000000000002e-05,
+      "lorentz_dist_loss": 0.7448,
+      "loss": 9.127013397216796,
+      "step": 80
+    },
+    {
+      "epoch": 12.857142857142858,
+      "grad_norm": 7791.78955078125,
+      "learning_rate": 2.6860000000000004e-05,
+      "lorentz_dist_loss": 0.7176,
+      "loss": 8.800634765625,
+      "step": 90
+    },
+    {
+      "epoch": 14.285714285714286,
+      "grad_norm": 7794.1259765625,
+      "learning_rate": 3.026e-05,
+      "lorentz_dist_loss": 0.7014,
+      "loss": 8.605958557128906,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 105,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 896596143621120.0,
+  "train_batch_size": 12,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-105/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81294502804efcb3a9510bb4aef4a5dfafe7bd4697bb802b14ff6b17f0d6bc98
+size 5201