baby-dev commited on
Commit
9df8f60
·
verified ·
1 Parent(s): b3e6c35

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41edb5256aa421ac63d1b2ba4f20a4d47257d2b4a65a5d4b1232ae71729f5362
3
  size 48552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af136286df6ff5905af165343cf57f5a831518e937bb6fc0471b78269e350229
3
  size 48552
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3acf8603b1c1cef0dd78862911a72d620d40796a965c4e19dd553291458f210
3
  size 107046
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be91a147d44e75d2fcfd702628b092ecfc4ce8e1dd0ac3f0bd83e38b571d6119
3
  size 107046
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aa86072d894b06ed15a781ebf27303cfbb0a54143c21949f75c3251dd2ec55c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b97e51ea9f5d34aebc6d066d7c2fb7a20dbec78f27d117049f04bf79c02536c3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7962e06291987b3327e85ffe1a0f48aec5a6651b8b5139bec8a1fd5526407429
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a277d82c7576fb11769cdddd31bacd8b5bc730226a0e626981ae85a7a41efa44
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9979035639412998,
5
  "eval_steps": 500,
6
- "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -50,6 +50,56 @@
50
  "eval_samples_per_second": 242.468,
51
  "eval_steps_per_second": 62.417,
52
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
  ],
55
  "logging_steps": 20,
@@ -69,7 +119,7 @@
69
  "attributes": {}
70
  }
71
  },
72
- "total_flos": 134180413440.0,
73
  "train_batch_size": 4,
74
  "trial_name": null,
75
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9958071278825997,
5
  "eval_steps": 500,
6
+ "global_step": 238,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
50
  "eval_samples_per_second": 242.468,
51
  "eval_steps_per_second": 62.417,
52
  "step": 119
53
+ },
54
+ {
55
+ "epoch": 1.0062893081761006,
56
+ "grad_norm": 0.11942638456821442,
57
+ "learning_rate": 0.00012713675213675213,
58
+ "loss": 12.3702,
59
+ "step": 120
60
+ },
61
+ {
62
+ "epoch": 1.1740041928721174,
63
+ "grad_norm": 0.059040140360593796,
64
+ "learning_rate": 0.00010576923076923077,
65
+ "loss": 11.8371,
66
+ "step": 140
67
+ },
68
+ {
69
+ "epoch": 1.3417190775681342,
70
+ "grad_norm": 0.06853944063186646,
71
+ "learning_rate": 8.440170940170941e-05,
72
+ "loss": 12.0029,
73
+ "step": 160
74
+ },
75
+ {
76
+ "epoch": 1.509433962264151,
77
+ "grad_norm": 0.030212825164198875,
78
+ "learning_rate": 6.303418803418804e-05,
79
+ "loss": 11.9184,
80
+ "step": 180
81
+ },
82
+ {
83
+ "epoch": 1.6771488469601676,
84
+ "grad_norm": 0.034588687121868134,
85
+ "learning_rate": 4.1666666666666665e-05,
86
+ "loss": 11.9407,
87
+ "step": 200
88
+ },
89
+ {
90
+ "epoch": 1.8448637316561844,
91
+ "grad_norm": 0.0361829474568367,
92
+ "learning_rate": 2.02991452991453e-05,
93
+ "loss": 11.8561,
94
+ "step": 220
95
+ },
96
+ {
97
+ "epoch": 1.9958071278825997,
98
+ "eval_loss": 11.917513847351074,
99
+ "eval_runtime": 0.4181,
100
+ "eval_samples_per_second": 241.592,
101
+ "eval_steps_per_second": 62.192,
102
+ "step": 238
103
  }
104
  ],
105
  "logging_steps": 20,
 
119
  "attributes": {}
120
  }
121
  },
122
+ "total_flos": 268360826880.0,
123
  "train_batch_size": 4,
124
  "trial_name": null,
125
  "trial_params": null