fpadovani commited on
Commit
b99d756
·
verified ·
1 Parent(s): 0a69a84

Training in progress, step 2000, checkpoint

Browse files
checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b09c0d86b97391a0c54f0f7d4571c5e574a4f5de5638911198cb59ac56770d7f
3
  size 435544704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4165babdf23f54f2113563ca6b7eb12562f41af373f3c054d91efa36212ebc98
3
  size 435544704
checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8df9a0a893c5ceb068475b1abb9e0539a8232ccb7b079ef46cfde81c5864ee6
3
  size 871183627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad22bf60c98aa4495600b3c60d436eecd2e53dc5f2c2e63ee1ea2701fc225c9
3
  size 871183627
checkpoint-2000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc205f33bb4203ce5c294ea6b70ba722e90bd45d65655c6d1c39f73f94aabfa
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0bb4c0c1a2fa8d5b77f62a4401eba5933f3439a753e45a0f06dc8b3b8f9508
3
  size 14709
checkpoint-2000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4aa03f6e0cd07cf67ce1fbe3101d545f5771ef9148b9debf02b11cf6948da5c
3
+ size 1383
checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2984b1d47ad6a58b0aa719b640f1cf7dfa19a592c642a7043ebe8d417af93d8a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdd062b4faf86828cbe9f1fcb29c400fb01075bbdb38048d321a810cbeef8c3a
3
  size 1465
checkpoint-2000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_global_step": 1968,
3
- "best_metric": 3.473982334136963,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.065040650406504,
6
  "eval_steps": 500,
7
  "global_step": 2000,
8
  "is_hyper_param_search": false,
@@ -10,75 +10,60 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.0020325203252032522,
14
- "grad_norm": 28.81853675842285,
15
  "learning_rate": 0.0,
16
- "loss": 10.3353,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.0,
21
- "grad_norm": 1.4544554948806763,
22
- "learning_rate": 9.82e-05,
23
- "loss": 4.8617,
24
- "step": 492
25
  },
26
  {
27
  "epoch": 1.0,
28
- "eval_loss": 3.9908180236816406,
29
- "eval_runtime": 66.4665,
30
- "eval_samples_per_second": 505.006,
31
- "eval_steps_per_second": 1.986,
32
- "step": 492
33
  },
34
  {
35
  "epoch": 2.0,
36
- "grad_norm": 1.4267009496688843,
37
- "learning_rate": 7.529411764705883e-05,
38
- "loss": 3.5249,
39
- "step": 984
40
  },
41
  {
42
  "epoch": 2.0,
43
- "eval_loss": 3.653867483139038,
44
- "eval_runtime": 66.5662,
45
- "eval_samples_per_second": 504.25,
46
- "eval_steps_per_second": 1.983,
47
- "step": 984
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.5028904676437378,
52
- "learning_rate": 5.0127877237851665e-05,
53
- "loss": 3.257,
54
- "step": 1476
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_loss": 3.5320048332214355,
59
- "eval_runtime": 66.4925,
60
- "eval_samples_per_second": 504.809,
61
- "eval_steps_per_second": 1.985,
62
- "step": 1476
63
- },
64
- {
65
- "epoch": 4.0,
66
- "grad_norm": 1.6736714839935303,
67
- "learning_rate": 2.4961636828644502e-05,
68
- "loss": 3.1208,
69
- "step": 1968
70
- },
71
- {
72
- "epoch": 4.0,
73
- "eval_loss": 3.473982334136963,
74
- "eval_runtime": 66.5329,
75
- "eval_samples_per_second": 504.503,
76
- "eval_steps_per_second": 1.984,
77
- "step": 1968
78
  }
79
  ],
80
- "logging_steps": 4000,
81
- "max_steps": 2460,
82
  "num_input_tokens_seen": 0,
83
  "num_train_epochs": 5,
84
  "save_steps": 500,
@@ -94,7 +79,7 @@
94
  "attributes": {}
95
  }
96
  },
97
- "total_flos": 3.3395212025856e+16,
98
  "train_batch_size": 256,
99
  "trial_name": null,
100
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1506,
3
+ "best_metric": 3.306654214859009,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.9840637450199203,
6
  "eval_steps": 500,
7
  "global_step": 2000,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.00199203187250996,
14
+ "grad_norm": 30.39765739440918,
15
  "learning_rate": 0.0,
16
+ "loss": 10.3969,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "grad_norm": 1.4528049230575562,
22
+ "learning_rate": 9.995012468827932e-05,
23
+ "loss": 4.8126,
24
+ "step": 502
25
  },
26
  {
27
  "epoch": 1.0,
28
+ "eval_loss": 3.7866852283477783,
29
+ "eval_runtime": 20.1378,
30
+ "eval_samples_per_second": 1574.851,
31
+ "eval_steps_per_second": 6.158,
32
+ "step": 502
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "grad_norm": 1.2457406520843506,
37
+ "learning_rate": 7.491271820448879e-05,
38
+ "loss": 3.5387,
39
+ "step": 1004
40
  },
41
  {
42
  "epoch": 2.0,
43
+ "eval_loss": 3.4403388500213623,
44
+ "eval_runtime": 20.1359,
45
+ "eval_samples_per_second": 1574.997,
46
+ "eval_steps_per_second": 6.158,
47
+ "step": 1004
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.3165825605392456,
52
+ "learning_rate": 4.987531172069826e-05,
53
+ "loss": 3.2924,
54
+ "step": 1506
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_loss": 3.306654214859009,
59
+ "eval_runtime": 20.2048,
60
+ "eval_samples_per_second": 1569.627,
61
+ "eval_steps_per_second": 6.137,
62
+ "step": 1506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  }
64
  ],
65
+ "logging_steps": 500,
66
+ "max_steps": 2510,
67
  "num_input_tokens_seen": 0,
68
  "num_train_epochs": 5,
69
  "save_steps": 500,
 
79
  "attributes": {}
80
  }
81
  },
82
+ "total_flos": 3.3411477454848e+16,
83
  "train_batch_size": 256,
84
  "trial_name": null,
85
  "trial_params": null
checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3290c0b51af746ef3fa5659adc5fa025f21c3647c818c43be3ade238b5ea6a1e
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:475acc627049b54c7e350da84d703383fb40cabc30b49e78872ae734cfff2130
3
  size 5969