| step,tag,value | |
| 5,train/loss,0.9807000160217285 | |
| 5,train/grad_norm,0.6335662603378296 | |
| 5,train/learning_rate,1.5999999959603883e-05 | |
| 5,train/epoch,1.7999999523162842 | |
| 5,train/epoch,1.7999999523162842 | |
| 9,train/epoch,3.0 | |
| 5,eval/loss,1.1169885396957397 | |
| 5,eval/runtime,4.340700149536133 | |
| 5,eval/samples_per_second,2.303999900817871 | |
| 5,eval/steps_per_second,0.6909999847412109 | |
| 9,train/train_runtime,27.42020034790039 | |
| 9,train/train_samples_per_second,1.093999981880188 | |
| 9,train/train_steps_per_second,0.328000009059906 | |
| 9,train/total_flos,132491738349568.0 | |
| 9,train/train_loss,0.9444599747657776 | |