NairaRahim commited on
Commit
307027a
·
verified ·
1 Parent(s): 91b1473

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed0a9e27912c4dc60639c8833f6bee50855f3e2302e82bda9e9c0868757378c3
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77ed00cbcfdc7b5605f93bc71df50ba9902c68af1f7757caa4da6d160b7e60a
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d872ac2969817b48da2f57c818767e386ef21d15a258e7f4b85128f1d9326c6d
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:989ac37794f1d3e800bb1bbd3247450b69a96a60b5a4ca851c554f4e40868485
3
  size 2454133690
last-checkpoint/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 35.5689697265625,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-1305",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,101 +10,101 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.07662835249042145,
13
- "grad_norm": 10.31527328491211,
14
  "learning_rate": 4.9952586206896554e-05,
15
- "loss": 58.4258,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.1532567049808429,
20
- "grad_norm": 4.617059230804443,
21
  "learning_rate": 4.990469348659004e-05,
22
- "loss": 38.9853,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.22988505747126436,
27
- "grad_norm": 2.52376127243042,
28
  "learning_rate": 4.985680076628353e-05,
29
- "loss": 35.8655,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.3065134099616858,
34
- "grad_norm": 3.7238430976867676,
35
  "learning_rate": 4.9808908045977015e-05,
36
- "loss": 34.9943,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.3831417624521073,
41
- "grad_norm": 2.0289058685302734,
42
  "learning_rate": 4.97610153256705e-05,
43
- "loss": 35.9013,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.45977011494252873,
48
- "grad_norm": 5.092604160308838,
49
  "learning_rate": 4.971312260536399e-05,
50
- "loss": 34.5219,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.5363984674329502,
55
- "grad_norm": 5.38631010055542,
56
  "learning_rate": 4.9665229885057475e-05,
57
- "loss": 34.7252,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.6130268199233716,
62
- "grad_norm": 3.966066360473633,
63
  "learning_rate": 4.961733716475096e-05,
64
- "loss": 34.5442,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.6896551724137931,
69
- "grad_norm": 3.288087844848633,
70
  "learning_rate": 4.956944444444445e-05,
71
- "loss": 33.4222,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.7662835249042146,
76
- "grad_norm": 4.245945453643799,
77
  "learning_rate": 4.952155172413793e-05,
78
- "loss": 34.3038,
79
  "step": 1000
80
  },
81
  {
82
  "epoch": 0.842911877394636,
83
- "grad_norm": 5.456043720245361,
84
  "learning_rate": 4.9473659003831416e-05,
85
- "loss": 33.9338,
86
  "step": 1100
87
  },
88
  {
89
  "epoch": 0.9195402298850575,
90
- "grad_norm": 3.3580803871154785,
91
  "learning_rate": 4.94257662835249e-05,
92
- "loss": 34.2207,
93
  "step": 1200
94
  },
95
  {
96
  "epoch": 0.9961685823754789,
97
- "grad_norm": 2.4025654792785645,
98
  "learning_rate": 4.937787356321839e-05,
99
- "loss": 33.5373,
100
  "step": 1300
101
  },
102
  {
103
  "epoch": 1.0,
104
- "eval_loss": 35.5689697265625,
105
- "eval_runtime": 49.3779,
106
- "eval_samples_per_second": 26.429,
107
- "eval_steps_per_second": 3.321,
108
  "step": 1305
109
  }
110
  ],
 
1
  {
2
+ "best_metric": 35.55657958984375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-1305",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.07662835249042145,
13
+ "grad_norm": 9.95783519744873,
14
  "learning_rate": 4.9952586206896554e-05,
15
+ "loss": 58.2962,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.1532567049808429,
20
+ "grad_norm": 4.181793212890625,
21
  "learning_rate": 4.990469348659004e-05,
22
+ "loss": 38.6823,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.22988505747126436,
27
+ "grad_norm": 2.342090368270874,
28
  "learning_rate": 4.985680076628353e-05,
29
+ "loss": 35.8141,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.3065134099616858,
34
+ "grad_norm": 3.5410687923431396,
35
  "learning_rate": 4.9808908045977015e-05,
36
+ "loss": 35.0102,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.3831417624521073,
41
+ "grad_norm": 1.9479660987854004,
42
  "learning_rate": 4.97610153256705e-05,
43
+ "loss": 35.9766,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.45977011494252873,
48
+ "grad_norm": 4.819860458374023,
49
  "learning_rate": 4.971312260536399e-05,
50
+ "loss": 34.6098,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.5363984674329502,
55
+ "grad_norm": 5.347679138183594,
56
  "learning_rate": 4.9665229885057475e-05,
57
+ "loss": 34.7696,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.6130268199233716,
62
+ "grad_norm": 3.895015239715576,
63
  "learning_rate": 4.961733716475096e-05,
64
+ "loss": 34.5659,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.6896551724137931,
69
+ "grad_norm": 3.2303035259246826,
70
  "learning_rate": 4.956944444444445e-05,
71
+ "loss": 33.4366,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.7662835249042146,
76
+ "grad_norm": 4.221305847167969,
77
  "learning_rate": 4.952155172413793e-05,
78
+ "loss": 34.3158,
79
  "step": 1000
80
  },
81
  {
82
  "epoch": 0.842911877394636,
83
+ "grad_norm": 5.473431587219238,
84
  "learning_rate": 4.9473659003831416e-05,
85
+ "loss": 33.9442,
86
  "step": 1100
87
  },
88
  {
89
  "epoch": 0.9195402298850575,
90
+ "grad_norm": 3.3115882873535156,
91
  "learning_rate": 4.94257662835249e-05,
92
+ "loss": 34.2257,
93
  "step": 1200
94
  },
95
  {
96
  "epoch": 0.9961685823754789,
97
+ "grad_norm": 2.4284849166870117,
98
  "learning_rate": 4.937787356321839e-05,
99
+ "loss": 33.5458,
100
  "step": 1300
101
  },
102
  {
103
  "epoch": 1.0,
104
+ "eval_loss": 35.55657958984375,
105
+ "eval_runtime": 49.2878,
106
+ "eval_samples_per_second": 26.477,
107
+ "eval_steps_per_second": 3.327,
108
  "step": 1305
109
  }
110
  ],