baby-dev commited on
Commit
449ff6a
·
verified ·
1 Parent(s): f99f6fc

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e979dbd3cbd940f959a4ba68cfe39ed192ba6aaf26b940558b355981237dddf9
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b367de47e5966fa8aad849c95b23eaff4a33c70ff5298d5b3d88e91984573b55
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39a2e574caffeb2fdfdb41ce93130d1ac0d397bbcadbeb7dd4e2d64b60937f1b
3
  size 35778900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b40cd55d966e26ccaed9c05a73c18c5bccd3f041050c4308e1b1f5ab6c377f
3
  size 35778900
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aff15d3484f18cecf83b769183ac0cc241615e7ba3d43c9e65497b2930f18c5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:522b123c0533634a3c8a15c4e1445a6191f73e80bcc350dc28da1bb5df0f111c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcf348532606e290f3cddebc7f00005cce6f05bb1cced2bad1d4a15482755657
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:884dbcf1f76cdf64d0ca16f2c81847fd9a35ed97a8bdfbbdeee456fb385a47c3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 367,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -64,6 +64,49 @@
64
  "eval_samples_per_second": 72.046,
65
  "eval_steps_per_second": 18.186,
66
  "step": 367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
  ],
69
  "logging_steps": 50,
@@ -78,12 +121,12 @@
78
  "should_evaluate": false,
79
  "should_log": false,
80
  "should_save": true,
81
- "should_training_stop": false
82
  },
83
  "attributes": {}
84
  }
85
  },
86
- "total_flos": 1.189318078660608e+16,
87
  "train_batch_size": 4,
88
  "trial_name": null,
89
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.6348773841961854,
5
  "eval_steps": 500,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
64
  "eval_samples_per_second": 72.046,
65
  "eval_steps_per_second": 18.186,
66
  "step": 367
67
+ },
68
+ {
69
+ "epoch": 1.0899182561307903,
70
+ "grad_norm": 0.2055899053812027,
71
+ "learning_rate": 0.0004,
72
+ "loss": 0.2806,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 1.226158038147139,
77
+ "grad_norm": 0.28835996985435486,
78
+ "learning_rate": 0.0004,
79
+ "loss": 0.2058,
80
+ "step": 450
81
+ },
82
+ {
83
+ "epoch": 1.3623978201634879,
84
+ "grad_norm": 0.30920594930648804,
85
+ "learning_rate": 0.0004,
86
+ "loss": 0.1817,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 1.4986376021798364,
91
+ "grad_norm": 0.18480294942855835,
92
+ "learning_rate": 0.0004,
93
+ "loss": 0.1557,
94
+ "step": 550
95
+ },
96
+ {
97
+ "epoch": 1.6348773841961854,
98
+ "grad_norm": 0.6040850877761841,
99
+ "learning_rate": 0.0004,
100
+ "loss": 0.1337,
101
+ "step": 600
102
+ },
103
+ {
104
+ "epoch": 1.6348773841961854,
105
+ "eval_loss": 0.14192721247673035,
106
+ "eval_runtime": 4.3794,
107
+ "eval_samples_per_second": 70.557,
108
+ "eval_steps_per_second": 17.811,
109
+ "step": 600
110
  }
111
  ],
112
  "logging_steps": 50,
 
121
  "should_evaluate": false,
122
  "should_log": false,
123
  "should_save": true,
124
+ "should_training_stop": true
125
  },
126
  "attributes": {}
127
  }
128
  },
129
+ "total_flos": 1.945276023865344e+16,
130
  "train_batch_size": 4,
131
  "trial_name": null,
132
  "trial_params": null