bitsoko commited on
Commit
251d6d8
·
verified ·
1 Parent(s): 640dac3

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd75d74313cede354e3941f73f3842af68de408eca240a13bca2aa0a207de9c0
3
  size 1912664024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:419ef2c365b2df61990765df67db820969667778e123c6c39e29b83cc59610e1
3
  size 1912664024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:121c173b4e8064cbea232239a5ec74e0a24731ce43d30eab0717384e9d6f67c2
3
  size 958697364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceafd3de144036993fa29e2a17c0194470e240a1f774ed6130da298efb1038af
3
  size 958697364
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:842666bae3e6d50174909a214b88b57822ee9f4eb536e3ad5a5e1b5cac198a06
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399c6e0a1a129bcf47d55d6652cc4710de81e426cb3bb5b9430151acfb862766
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3644cf9f8e3ac9f4f534109bec24dea135af8ce9e36c33ebe8e1053def47661f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:857bbe62f2d165935d1ce9ae82dd80a72b9db823aa28c4ae95a8b905e80accaa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006135345726731701,
5
  "eval_steps": 20,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -82,6 +82,36 @@
82
  "eval_samples_per_second": 3.802,
83
  "eval_steps_per_second": 0.494,
84
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 20,
@@ -89,7 +119,7 @@
89
  "num_input_tokens_seen": 0,
90
  "num_train_epochs": 3,
91
  "save_steps": 50,
92
- "total_flos": 6919962687590400.0,
93
  "train_batch_size": 2,
94
  "trial_name": null,
95
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.009203018590097553,
5
  "eval_steps": 20,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
82
  "eval_samples_per_second": 3.802,
83
  "eval_steps_per_second": 0.494,
84
  "step": 100
85
+ },
86
+ {
87
+ "epoch": 0.007362414872078042,
88
+ "grad_norm": 0.11603377014398575,
89
+ "learning_rate": 0.000199529575390657,
90
+ "loss": 1.7827,
91
+ "step": 120
92
+ },
93
+ {
94
+ "epoch": 0.007362414872078042,
95
+ "eval_loss": 1.773740291595459,
96
+ "eval_runtime": 26.4277,
97
+ "eval_samples_per_second": 3.784,
98
+ "eval_steps_per_second": 0.492,
99
+ "step": 120
100
+ },
101
+ {
102
+ "epoch": 0.008589484017424381,
103
+ "grad_norm": 0.10753527283668518,
104
+ "learning_rate": 0.00019944776241511905,
105
+ "loss": 1.8118,
106
+ "step": 140
107
+ },
108
+ {
109
+ "epoch": 0.008589484017424381,
110
+ "eval_loss": 1.7376396656036377,
111
+ "eval_runtime": 26.2097,
112
+ "eval_samples_per_second": 3.815,
113
+ "eval_steps_per_second": 0.496,
114
+ "step": 140
115
  }
116
  ],
117
  "logging_steps": 20,
 
119
  "num_input_tokens_seen": 0,
120
  "num_train_epochs": 3,
121
  "save_steps": 50,
122
+ "total_flos": 1.02462852169728e+16,
123
  "train_batch_size": 2,
124
  "trial_name": null,
125
  "trial_params": null