bitsoko commited on
Commit
1431fe3
·
verified ·
1 Parent(s): 42b90f1

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:592ab26a2b97723a39c6ebf711264535154b01ae69104de25f0ab4d81a566e09
3
  size 119597408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e23a239d6297d3c02bb38cbb0f1271275b709dcbdeb8dd00039dace5557b950
3
  size 119597408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:821fd80c38d03c753fc6bc106fdaa21ba03f6ef3eb8f74d7ae2964902e6bfb63
3
  size 60386772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f23fb542af297c715b6dd28412917ef1b6b037abfdd12e08871b61d04ea5ea3
3
  size 60386772
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:842666bae3e6d50174909a214b88b57822ee9f4eb536e3ad5a5e1b5cac198a06
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399c6e0a1a129bcf47d55d6652cc4710de81e426cb3bb5b9430151acfb862766
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3644cf9f8e3ac9f4f534109bec24dea135af8ce9e36c33ebe8e1053def47661f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:857bbe62f2d165935d1ce9ae82dd80a72b9db823aa28c4ae95a8b905e80accaa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006135345726731701,
5
  "eval_steps": 20,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -82,6 +82,36 @@
82
  "eval_samples_per_second": 5.087,
83
  "eval_steps_per_second": 0.661,
84
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 20,
@@ -89,7 +119,7 @@
89
  "num_input_tokens_seen": 0,
90
  "num_train_epochs": 3,
91
  "save_steps": 50,
92
- "total_flos": 6169094793400320.0,
93
  "train_batch_size": 2,
94
  "trial_name": null,
95
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.009203018590097553,
5
  "eval_steps": 20,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
82
  "eval_samples_per_second": 5.087,
83
  "eval_steps_per_second": 0.661,
84
  "step": 100
85
+ },
86
+ {
87
+ "epoch": 0.007362414872078042,
88
+ "grad_norm": 0.8181987404823303,
89
+ "learning_rate": 0.000199529575390657,
90
+ "loss": 1.7025,
91
+ "step": 120
92
+ },
93
+ {
94
+ "epoch": 0.007362414872078042,
95
+ "eval_loss": 1.6928819417953491,
96
+ "eval_runtime": 19.5713,
97
+ "eval_samples_per_second": 5.11,
98
+ "eval_steps_per_second": 0.664,
99
+ "step": 120
100
+ },
101
+ {
102
+ "epoch": 0.008589484017424381,
103
+ "grad_norm": 0.6988233327865601,
104
+ "learning_rate": 0.00019944776241511905,
105
+ "loss": 1.7335,
106
+ "step": 140
107
+ },
108
+ {
109
+ "epoch": 0.008589484017424381,
110
+ "eval_loss": 1.6641780138015747,
111
+ "eval_runtime": 19.6822,
112
+ "eval_samples_per_second": 5.081,
113
+ "eval_steps_per_second": 0.66,
114
+ "step": 140
115
  }
116
  ],
117
  "logging_steps": 20,
 
119
  "num_input_tokens_seen": 0,
120
  "num_train_epochs": 3,
121
  "save_steps": 50,
122
+ "total_flos": 9140460390236160.0,
123
  "train_batch_size": 2,
124
  "trial_name": null,
125
  "trial_params": null