NairaRahim commited on
Commit
de375ac
·
verified ·
1 Parent(s): 05284c0

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c77ed00cbcfdc7b5605f93bc71df50ba9902c68af1f7757caa4da6d160b7e60a
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9f769a1a8a8b6a773996d7c24b349e4672595d1b4ff0b77824a8db6f68871c
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:989ac37794f1d3e800bb1bbd3247450b69a96a60b5a4ca851c554f4e40868485
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f7703588a46eff1240aab8e93209d8f65d0aae932e6049646bf5f7ac4a0b063
3
  size 2454133690
last-checkpoint/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 35.55657958984375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-1305",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,101 +10,101 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.07662835249042145,
13
- "grad_norm": 9.95783519744873,
14
  "learning_rate": 4.9952586206896554e-05,
15
- "loss": 58.2962,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.1532567049808429,
20
- "grad_norm": 4.181793212890625,
21
  "learning_rate": 4.990469348659004e-05,
22
- "loss": 38.6823,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.22988505747126436,
27
- "grad_norm": 2.342090368270874,
28
  "learning_rate": 4.985680076628353e-05,
29
- "loss": 35.8141,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.3065134099616858,
34
- "grad_norm": 3.5410687923431396,
35
  "learning_rate": 4.9808908045977015e-05,
36
- "loss": 35.0102,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.3831417624521073,
41
- "grad_norm": 1.9479660987854004,
42
  "learning_rate": 4.97610153256705e-05,
43
- "loss": 35.9766,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.45977011494252873,
48
- "grad_norm": 4.819860458374023,
49
  "learning_rate": 4.971312260536399e-05,
50
- "loss": 34.6098,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.5363984674329502,
55
- "grad_norm": 5.347679138183594,
56
  "learning_rate": 4.9665229885057475e-05,
57
- "loss": 34.7696,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.6130268199233716,
62
- "grad_norm": 3.895015239715576,
63
  "learning_rate": 4.961733716475096e-05,
64
- "loss": 34.5659,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.6896551724137931,
69
- "grad_norm": 3.2303035259246826,
70
  "learning_rate": 4.956944444444445e-05,
71
- "loss": 33.4366,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.7662835249042146,
76
- "grad_norm": 4.221305847167969,
77
  "learning_rate": 4.952155172413793e-05,
78
- "loss": 34.3158,
79
  "step": 1000
80
  },
81
  {
82
  "epoch": 0.842911877394636,
83
- "grad_norm": 5.473431587219238,
84
  "learning_rate": 4.9473659003831416e-05,
85
- "loss": 33.9442,
86
  "step": 1100
87
  },
88
  {
89
  "epoch": 0.9195402298850575,
90
- "grad_norm": 3.3115882873535156,
91
  "learning_rate": 4.94257662835249e-05,
92
- "loss": 34.2257,
93
  "step": 1200
94
  },
95
  {
96
  "epoch": 0.9961685823754789,
97
- "grad_norm": 2.4284849166870117,
98
  "learning_rate": 4.937787356321839e-05,
99
- "loss": 33.5458,
100
  "step": 1300
101
  },
102
  {
103
  "epoch": 1.0,
104
- "eval_loss": 35.55657958984375,
105
- "eval_runtime": 49.2878,
106
- "eval_samples_per_second": 26.477,
107
- "eval_steps_per_second": 3.327,
108
  "step": 1305
109
  }
110
  ],
@@ -116,7 +116,7 @@
116
  "stateful_callbacks": {
117
  "EarlyStoppingCallback": {
118
  "args": {
119
- "early_stopping_patience": 3,
120
  "early_stopping_threshold": 0.0
121
  },
122
  "attributes": {
 
1
  {
2
+ "best_metric": 35.558197021484375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-1305",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.07662835249042145,
13
+ "grad_norm": 9.545656204223633,
14
  "learning_rate": 4.9952586206896554e-05,
15
+ "loss": 58.0015,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.1532567049808429,
20
+ "grad_norm": 3.9482674598693848,
21
  "learning_rate": 4.990469348659004e-05,
22
+ "loss": 38.502,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.22988505747126436,
27
+ "grad_norm": 2.5423216819763184,
28
  "learning_rate": 4.985680076628353e-05,
29
+ "loss": 35.7891,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.3065134099616858,
34
+ "grad_norm": 3.6723568439483643,
35
  "learning_rate": 4.9808908045977015e-05,
36
+ "loss": 34.9999,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.3831417624521073,
41
+ "grad_norm": 2.0953221321105957,
42
  "learning_rate": 4.97610153256705e-05,
43
+ "loss": 35.9283,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.45977011494252873,
48
+ "grad_norm": 4.932604789733887,
49
  "learning_rate": 4.971312260536399e-05,
50
+ "loss": 34.5531,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.5363984674329502,
55
+ "grad_norm": 5.419522762298584,
56
  "learning_rate": 4.9665229885057475e-05,
57
+ "loss": 34.7408,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.6130268199233716,
62
+ "grad_norm": 3.9690020084381104,
63
  "learning_rate": 4.961733716475096e-05,
64
+ "loss": 34.5521,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.6896551724137931,
69
+ "grad_norm": 3.3197548389434814,
70
  "learning_rate": 4.956944444444445e-05,
71
+ "loss": 33.4281,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.7662835249042146,
76
+ "grad_norm": 4.233493328094482,
77
  "learning_rate": 4.952155172413793e-05,
78
+ "loss": 34.3137,
79
  "step": 1000
80
  },
81
  {
82
  "epoch": 0.842911877394636,
83
+ "grad_norm": 5.390758037567139,
84
  "learning_rate": 4.9473659003831416e-05,
85
+ "loss": 33.9454,
86
  "step": 1100
87
  },
88
  {
89
  "epoch": 0.9195402298850575,
90
+ "grad_norm": 3.419612407684326,
91
  "learning_rate": 4.94257662835249e-05,
92
+ "loss": 34.2298,
93
  "step": 1200
94
  },
95
  {
96
  "epoch": 0.9961685823754789,
97
+ "grad_norm": 2.3791182041168213,
98
  "learning_rate": 4.937787356321839e-05,
99
+ "loss": 33.5481,
100
  "step": 1300
101
  },
102
  {
103
  "epoch": 1.0,
104
+ "eval_loss": 35.558197021484375,
105
+ "eval_runtime": 49.3359,
106
+ "eval_samples_per_second": 26.451,
107
+ "eval_steps_per_second": 3.324,
108
  "step": 1305
109
  }
110
  ],
 
116
  "stateful_callbacks": {
117
  "EarlyStoppingCallback": {
118
  "args": {
119
+ "early_stopping_patience": 10,
120
  "early_stopping_threshold": 0.0
121
  },
122
  "attributes": {