Training in progress, step 100, checkpoint

Browse files

Files changed (7) hide show

checkpoint-100/README.md +2 -1
checkpoint-100/adapter_config.json +2 -1
checkpoint-100/adapter_model.safetensors +1 -1
checkpoint-100/optimizer.pt +1 -1
checkpoint-100/rng_state.pth +1 -1
checkpoint-100/trainer_state.json +34 -24
checkpoint-100/training_args.bin +2 -2

checkpoint-100/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 library_name: peft
-base_model: exontidev/SISUS_SIKERS
 ---
 # Model Card for Model ID
@@ -201,5 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ### Framework versions
 - PEFT 0.8.2
 - PEFT 0.7.1

 ---
 library_name: peft
+base_model: IlyaGusev/rugpt_large_turbo_instructed
 ---
 # Model Card for Model ID
 ### Framework versions
+- PEFT 0.9.0
 - PEFT 0.8.2
 - PEFT 0.7.1

checkpoint-100/adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "exontidev/SISUS_SIKERS",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -22,5 +22,6 @@
     "c_attn"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false
 }

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "IlyaGusev/rugpt_large_turbo_instructed",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
     "c_attn"
   ],
   "task_type": "CAUSAL_LM",
+  "use_dora": false,
   "use_rslora": false
 }

checkpoint-100/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83297727419ff3fdfcb22d67abebaec4169a2026b2157194b8c18c8d0d3fc7b0
 size 9443384

 version https://git-lfs.github.com/spec/v1
+oid sha256:723d245e9ca9bf2cf103f4c7cbcc64b245ae77c89fe73a88a7d0665207cbc743
 size 9443384

checkpoint-100/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6bc1c2471ae085ae777895956c9c2d09bbe11bfa7f5423fadb03417a66d6b5e
 size 18914450

 version https://git-lfs.github.com/spec/v1
+oid sha256:24e0b23d4bf5faab5873c91c421fc565e50df58dc82d2f05d6243453c1a2f3ee
 size 18914450

checkpoint-100/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:095b555d6adde095e244513c780c9707a33481faa3b1e2781321113c02b6edbd
 size 14168

 version https://git-lfs.github.com/spec/v1
+oid sha256:11c0f1c9884efbd52a2ccba350f60152761f753ca8e4d0fe74b04f5dbf78a9a4
 size 14168

checkpoint-100/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.24613491269902316,
   "eval_steps": 500,
   "global_step": 100,
   "is_hyper_param_search": false,
@@ -9,72 +9,82 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.02,
       "learning_rate": 2.9999999999999997e-05,
-      "loss": 3.623,
       "step": 10
     },
     {
-      "epoch": 0.05,
       "learning_rate": 5.9999999999999995e-05,
-      "loss": 3.605,
       "step": 20
     },
     {
-      "epoch": 0.07,
       "learning_rate": 8.999999999999999e-05,
-      "loss": 3.5168,
       "step": 30
     },
     {
-      "epoch": 0.1,
       "learning_rate": 0.00011999999999999999,
-      "loss": 3.3511,
       "step": 40
     },
     {
-      "epoch": 0.12,
       "learning_rate": 0.00015,
-      "loss": 3.1952,
       "step": 50
     },
     {
-      "epoch": 0.15,
       "learning_rate": 0.00017999999999999998,
-      "loss": 3.0575,
       "step": 60
     },
     {
-      "epoch": 0.17,
       "learning_rate": 0.00020999999999999998,
-      "loss": 2.8575,
       "step": 70
     },
     {
-      "epoch": 0.2,
       "learning_rate": 0.00023999999999999998,
-      "loss": 2.7029,
       "step": 80
     },
     {
-      "epoch": 0.22,
       "learning_rate": 0.00027,
-      "loss": 2.6392,
       "step": 90
     },
     {
-      "epoch": 0.25,
       "learning_rate": 0.0003,
-      "loss": 2.5679,
       "step": 100
     }
   ],
   "logging_steps": 10,
-  "max_steps": 200,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
   "save_steps": 100,
-  "total_flos": 1.3643358381416448e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.177545691906005,
   "eval_steps": 500,
   "global_step": 100,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.42,
+      "grad_norm": 0.18151910603046417,
       "learning_rate": 2.9999999999999997e-05,
+      "loss": 3.8655,
       "step": 10
     },
     {
+      "epoch": 0.84,
+      "grad_norm": 0.23640306293964386,
       "learning_rate": 5.9999999999999995e-05,
+      "loss": 3.8223,
       "step": 20
     },
     {
+      "epoch": 1.25,
+      "grad_norm": 0.3272729814052582,
       "learning_rate": 8.999999999999999e-05,
+      "loss": 3.7256,
       "step": 30
     },
     {
+      "epoch": 1.67,
+      "grad_norm": 0.3942272365093231,
       "learning_rate": 0.00011999999999999999,
+      "loss": 3.4944,
       "step": 40
     },
     {
+      "epoch": 2.09,
+      "grad_norm": 0.5312587022781372,
       "learning_rate": 0.00015,
+      "loss": 3.0931,
       "step": 50
     },
     {
+      "epoch": 2.51,
+      "grad_norm": 0.6111555695533752,
       "learning_rate": 0.00017999999999999998,
+      "loss": 2.5215,
       "step": 60
     },
     {
+      "epoch": 2.92,
+      "grad_norm": 0.4672750234603882,
       "learning_rate": 0.00020999999999999998,
+      "loss": 1.8905,
       "step": 70
     },
     {
+      "epoch": 3.34,
+      "grad_norm": 0.16217181086540222,
       "learning_rate": 0.00023999999999999998,
+      "loss": 1.5992,
       "step": 80
     },
     {
+      "epoch": 3.76,
+      "grad_norm": 0.11098425090312958,
       "learning_rate": 0.00027,
+      "loss": 1.4717,
       "step": 90
     },
     {
+      "epoch": 4.18,
+      "grad_norm": 0.08228754252195358,
       "learning_rate": 0.0003,
+      "loss": 1.4336,
       "step": 100
     }
   ],
   "logging_steps": 10,
+  "max_steps": 300,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 14,
   "save_steps": 100,
+  "total_flos": 1.863006384782131e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

checkpoint-100/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d7fe1522993e9d0465d19bb981aba89a2d40513d15ab46cef263ce1ebaa4eb5
-size 4768

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8b7fad922cb586b3f4c9d21e8952021ac611c19938a5281fabb27589d026b64
+size 4960