Training in progress, step 100, checkpoint

Browse files

Files changed (7) hide show

checkpoint-100/README.md +3 -1
checkpoint-100/adapter_config.json +3 -2
checkpoint-100/adapter_model.safetensors +1 -1
checkpoint-100/optimizer.pt +1 -1
checkpoint-100/rng_state.pth +1 -1
checkpoint-100/trainer_state.json +33 -33
checkpoint-100/training_args.bin +2 -2

checkpoint-100/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 library_name: peft
-base_model: IlyaGusev/rugpt_large_turbo_instructed
 ---
 # Model Card for Model ID
@@ -201,5 +201,7 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ### Framework versions
 - PEFT 0.9.0
 - PEFT 0.7.1

 ---
 library_name: peft
+base_model: exontidev/SISUS_SIKERS
 ---
 # Model Card for Model ID
 ### Framework versions
+- PEFT 0.10.0
 - PEFT 0.9.0
+- PEFT 0.8.2
 - PEFT 0.7.1

checkpoint-100/adapter_config.json CHANGED Viewed

@@ -1,11 +1,12 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "IlyaGusev/rugpt_large_turbo_instructed",
   "bias": "none",
-  "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "exontidev/SISUS_SIKERS",
   "bias": "none",
+  "fan_in_fan_out": true,
   "inference_mode": true,
   "init_lora_weights": true,
+  "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},

checkpoint-100/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1a1118bbcbdab8294afd7683e4ef0e3a54d94d1b86e8087686f7e2822a75195
 size 9443384

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2b5285810c20bd09e835779ffe6024527b320ac033491cd8183484c601bf233
 size 9443384

checkpoint-100/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5445f987e06033ca8180dcacac7106c163198139c090c3fb6a5c5a123d3e3751
 size 18914450

 version https://git-lfs.github.com/spec/v1
+oid sha256:99d0b39339284219fa33e62f42f124120b19e564a33b374195bdae3896bb1592
 size 18914450

checkpoint-100/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11c0f1c9884efbd52a2ccba350f60152761f753ca8e4d0fe74b04f5dbf78a9a4
 size 14168

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bab3f8144c2aa46aa41f9ab21aa5ec24e807b1a93fb2c1875d977349cf29cc5
 size 14168

checkpoint-100/trainer_state.json CHANGED Viewed

@@ -9,82 +9,82 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.42,
-      "grad_norm": 0.18415939807891846,
       "learning_rate": 2.9999999999999997e-05,
-      "loss": 3.8654,
       "step": 10
     },
     {
-      "epoch": 0.84,
-      "grad_norm": 0.23708771169185638,
       "learning_rate": 5.9999999999999995e-05,
-      "loss": 3.8218,
       "step": 20
     },
     {
-      "epoch": 1.25,
-      "grad_norm": 0.3239809572696686,
       "learning_rate": 8.999999999999999e-05,
-      "loss": 3.725,
       "step": 30
     },
     {
-      "epoch": 1.67,
-      "grad_norm": 0.38910844922065735,
       "learning_rate": 0.00011999999999999999,
-      "loss": 3.4953,
       "step": 40
     },
     {
-      "epoch": 2.09,
-      "grad_norm": 0.520912230014801,
       "learning_rate": 0.00015,
-      "loss": 3.0984,
       "step": 50
     },
     {
-      "epoch": 2.51,
-      "grad_norm": 0.6063631772994995,
       "learning_rate": 0.00017999999999999998,
-      "loss": 2.5316,
       "step": 60
     },
     {
-      "epoch": 2.92,
-      "grad_norm": 0.4615532457828522,
       "learning_rate": 0.00020999999999999998,
-      "loss": 1.8987,
       "step": 70
     },
     {
-      "epoch": 3.34,
-      "grad_norm": 0.16907210648059845,
       "learning_rate": 0.00023999999999999998,
-      "loss": 1.6041,
       "step": 80
     },
     {
-      "epoch": 3.76,
-      "grad_norm": 0.11257671564817429,
       "learning_rate": 0.00027,
-      "loss": 1.4732,
       "step": 90
     },
     {
-      "epoch": 4.18,
-      "grad_norm": 0.08190377801656723,
       "learning_rate": 0.0003,
-      "loss": 1.4341,
       "step": 100
     }
   ],
   "logging_steps": 10,
-  "max_steps": 175,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 8,
   "save_steps": 100,
-  "total_flos": 1.863006384782131e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.4177545691906005,
+      "grad_norm": 0.28227752447128296,
       "learning_rate": 2.9999999999999997e-05,
+      "loss": 4.1508,
       "step": 10
     },
     {
+      "epoch": 0.835509138381201,
+      "grad_norm": 0.31433430314064026,
       "learning_rate": 5.9999999999999995e-05,
+      "loss": 4.1593,
       "step": 20
     },
     {
+      "epoch": 1.2532637075718016,
+      "grad_norm": 0.3350953161716461,
       "learning_rate": 8.999999999999999e-05,
+      "loss": 4.0414,
       "step": 30
     },
     {
+      "epoch": 1.671018276762402,
+      "grad_norm": 0.2885706126689911,
       "learning_rate": 0.00011999999999999999,
+      "loss": 3.8411,
       "step": 40
     },
     {
+      "epoch": 2.0887728459530024,
+      "grad_norm": 0.23711609840393066,
       "learning_rate": 0.00015,
+      "loss": 3.6434,
       "step": 50
     },
     {
+      "epoch": 2.506527415143603,
+      "grad_norm": 0.21583135426044464,
       "learning_rate": 0.00017999999999999998,
+      "loss": 3.4636,
       "step": 60
     },
     {
+      "epoch": 2.9242819843342036,
+      "grad_norm": 0.18754692375659943,
       "learning_rate": 0.00020999999999999998,
+      "loss": 3.3154,
       "step": 70
     },
     {
+      "epoch": 3.342036553524804,
+      "grad_norm": 0.15951760113239288,
       "learning_rate": 0.00023999999999999998,
+      "loss": 3.2195,
       "step": 80
     },
     {
+      "epoch": 3.759791122715405,
+      "grad_norm": 0.14639759063720703,
       "learning_rate": 0.00027,
+      "loss": 3.122,
       "step": 90
     },
     {
+      "epoch": 4.177545691906005,
+      "grad_norm": 0.1860765665769577,
       "learning_rate": 0.0003,
+      "loss": 3.0677,
       "step": 100
     }
   ],
   "logging_steps": 10,
+  "max_steps": 300,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 14,
   "save_steps": 100,
+  "total_flos": 1.6201284405755904e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

checkpoint-100/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79f806349f0bb61a00c2d540daba3c569079d7140776f830f475a964ef346330
-size 4960

 version https://git-lfs.github.com/spec/v1
+oid sha256:42329f5345a3c120af37c6fdbce453b0541524f81257e209baeb9a0b15e22c94
+size 5024