EN3S commited on
Commit
faf72b8
·
verified ·
1 Parent(s): 7734ed9

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fea7a822343518cc5588fbbd4f4740020da2381f0d54605937eb5bf8dca113bf
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddac4d3a3d0da455d0e501fd182e68910ab7b0a78868896b9607f09a4a31cd3
3
  size 437958648
run-0/checkpoint-117/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 78,
3
  "best_metric": 0.6931407942238267,
4
- "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-78",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 117,
@@ -11,106 +11,106 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.2564102564102564,
14
- "grad_norm": 1.662626028060913,
15
  "learning_rate": 9.487179487179487e-05,
16
  "loss": 0.696,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5128205128205128,
21
- "grad_norm": 2.0300467014312744,
22
  "learning_rate": 8.974358974358975e-05,
23
  "loss": 0.6793,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.7692307692307693,
28
- "grad_norm": 4.490738868713379,
29
  "learning_rate": 8.461538461538461e-05,
30
  "loss": 0.6499,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 1.0,
35
- "eval_accuracy": 0.631768953068592,
36
- "eval_loss": 0.6309793591499329,
37
- "eval_runtime": 0.6606,
38
- "eval_samples_per_second": 419.298,
39
- "eval_steps_per_second": 7.569,
40
  "step": 39
41
  },
42
  {
43
  "epoch": 1.0256410256410255,
44
- "grad_norm": 3.481339454650879,
45
  "learning_rate": 7.948717948717948e-05,
46
- "loss": 0.6793,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 1.282051282051282,
51
- "grad_norm": 4.932971477508545,
52
  "learning_rate": 7.435897435897436e-05,
53
- "loss": 0.5602,
54
  "step": 50
55
  },
56
  {
57
  "epoch": 1.5384615384615383,
58
- "grad_norm": 7.099682807922363,
59
  "learning_rate": 6.923076923076924e-05,
60
- "loss": 0.5998,
61
  "step": 60
62
  },
63
  {
64
  "epoch": 1.7948717948717947,
65
- "grad_norm": 3.696152925491333,
66
  "learning_rate": 6.410256410256412e-05,
67
- "loss": 0.5403,
68
  "step": 70
69
  },
70
  {
71
  "epoch": 2.0,
72
- "eval_accuracy": 0.6931407942238267,
73
- "eval_loss": 0.6287456154823303,
74
- "eval_runtime": 0.6619,
75
- "eval_samples_per_second": 418.511,
76
- "eval_steps_per_second": 7.554,
77
  "step": 78
78
  },
79
  {
80
  "epoch": 2.051282051282051,
81
- "grad_norm": 3.8591785430908203,
82
  "learning_rate": 5.897435897435898e-05,
83
- "loss": 0.4872,
84
  "step": 80
85
  },
86
  {
87
  "epoch": 2.3076923076923075,
88
- "grad_norm": 4.27885627746582,
89
  "learning_rate": 5.384615384615385e-05,
90
- "loss": 0.3396,
91
  "step": 90
92
  },
93
  {
94
  "epoch": 2.564102564102564,
95
- "grad_norm": 4.205892562866211,
96
  "learning_rate": 4.871794871794872e-05,
97
- "loss": 0.3372,
98
  "step": 100
99
  },
100
  {
101
  "epoch": 2.8205128205128203,
102
- "grad_norm": 5.800762176513672,
103
  "learning_rate": 4.358974358974359e-05,
104
- "loss": 0.3422,
105
  "step": 110
106
  },
107
  {
108
  "epoch": 3.0,
109
- "eval_accuracy": 0.6787003610108303,
110
- "eval_loss": 0.7227063179016113,
111
- "eval_runtime": 0.6696,
112
- "eval_samples_per_second": 413.706,
113
- "eval_steps_per_second": 7.468,
114
  "step": 117
115
  }
116
  ],
 
1
  {
2
+ "best_global_step": 117,
3
  "best_metric": 0.6931407942238267,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-117",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 117,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.2564102564102564,
14
+ "grad_norm": 1.662625789642334,
15
  "learning_rate": 9.487179487179487e-05,
16
  "loss": 0.696,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5128205128205128,
21
+ "grad_norm": 2.0300142765045166,
22
  "learning_rate": 8.974358974358975e-05,
23
  "loss": 0.6793,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.7692307692307693,
28
+ "grad_norm": 4.4956440925598145,
29
  "learning_rate": 8.461538461538461e-05,
30
  "loss": 0.6499,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 1.0,
35
+ "eval_accuracy": 0.6173285198555957,
36
+ "eval_loss": 0.6330550909042358,
37
+ "eval_runtime": 0.6611,
38
+ "eval_samples_per_second": 418.98,
39
+ "eval_steps_per_second": 7.563,
40
  "step": 39
41
  },
42
  {
43
  "epoch": 1.0256410256410255,
44
+ "grad_norm": 3.44403338432312,
45
  "learning_rate": 7.948717948717948e-05,
46
+ "loss": 0.6782,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 1.282051282051282,
51
+ "grad_norm": 5.425619602203369,
52
  "learning_rate": 7.435897435897436e-05,
53
+ "loss": 0.5831,
54
  "step": 50
55
  },
56
  {
57
  "epoch": 1.5384615384615383,
58
+ "grad_norm": 11.772316932678223,
59
  "learning_rate": 6.923076923076924e-05,
60
+ "loss": 0.5307,
61
  "step": 60
62
  },
63
  {
64
  "epoch": 1.7948717948717947,
65
+ "grad_norm": 3.624584674835205,
66
  "learning_rate": 6.410256410256412e-05,
67
+ "loss": 0.5396,
68
  "step": 70
69
  },
70
  {
71
  "epoch": 2.0,
72
+ "eval_accuracy": 0.6859205776173285,
73
+ "eval_loss": 0.6415449380874634,
74
+ "eval_runtime": 0.6613,
75
+ "eval_samples_per_second": 418.848,
76
+ "eval_steps_per_second": 7.56,
77
  "step": 78
78
  },
79
  {
80
  "epoch": 2.051282051282051,
81
+ "grad_norm": 3.940481662750244,
82
  "learning_rate": 5.897435897435898e-05,
83
+ "loss": 0.455,
84
  "step": 80
85
  },
86
  {
87
  "epoch": 2.3076923076923075,
88
+ "grad_norm": 5.196528434753418,
89
  "learning_rate": 5.384615384615385e-05,
90
+ "loss": 0.3532,
91
  "step": 90
92
  },
93
  {
94
  "epoch": 2.564102564102564,
95
+ "grad_norm": 4.290834903717041,
96
  "learning_rate": 4.871794871794872e-05,
97
+ "loss": 0.3482,
98
  "step": 100
99
  },
100
  {
101
  "epoch": 2.8205128205128203,
102
+ "grad_norm": 4.902711391448975,
103
  "learning_rate": 4.358974358974359e-05,
104
+ "loss": 0.3223,
105
  "step": 110
106
  },
107
  {
108
  "epoch": 3.0,
109
+ "eval_accuracy": 0.6931407942238267,
110
+ "eval_loss": 0.7661525011062622,
111
+ "eval_runtime": 0.6709,
112
+ "eval_samples_per_second": 412.863,
113
+ "eval_steps_per_second": 7.452,
114
  "step": 117
115
  }
116
  ],
run-0/checkpoint-39/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86da292e44eb636b62e95367ef4936d89b7b6baee6fe9119a8b62c9e977f0c9d
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddac4d3a3d0da455d0e501fd182e68910ab7b0a78868896b9607f09a4a31cd3
3
  size 437958648
run-0/checkpoint-39/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e214d4bf994f70027fe98b044d22190bb61c8b1144a91ffbfb465e093a5c2438
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00aaa233a5ab792d448bf134427c353ee79cad1e66b0f69aaec190e7f1ef98af
3
  size 876038394
run-0/checkpoint-39/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 39,
3
- "best_metric": 0.631768953068592,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,32 +11,32 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.2564102564102564,
14
- "grad_norm": 1.662626028060913,
15
  "learning_rate": 9.487179487179487e-05,
16
  "loss": 0.696,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5128205128205128,
21
- "grad_norm": 2.0300467014312744,
22
  "learning_rate": 8.974358974358975e-05,
23
  "loss": 0.6793,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.7692307692307693,
28
- "grad_norm": 4.490738868713379,
29
  "learning_rate": 8.461538461538461e-05,
30
  "loss": 0.6499,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 1.0,
35
- "eval_accuracy": 0.631768953068592,
36
- "eval_loss": 0.6309793591499329,
37
- "eval_runtime": 0.6606,
38
- "eval_samples_per_second": 419.298,
39
- "eval_steps_per_second": 7.569,
40
  "step": 39
41
  }
42
  ],
 
1
  {
2
  "best_global_step": 39,
3
+ "best_metric": 0.6173285198555957,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.2564102564102564,
14
+ "grad_norm": 1.662625789642334,
15
  "learning_rate": 9.487179487179487e-05,
16
  "loss": 0.696,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5128205128205128,
21
+ "grad_norm": 2.0300142765045166,
22
  "learning_rate": 8.974358974358975e-05,
23
  "loss": 0.6793,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.7692307692307693,
28
+ "grad_norm": 4.4956440925598145,
29
  "learning_rate": 8.461538461538461e-05,
30
  "loss": 0.6499,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 1.0,
35
+ "eval_accuracy": 0.6173285198555957,
36
+ "eval_loss": 0.6330550909042358,
37
+ "eval_runtime": 0.6611,
38
+ "eval_samples_per_second": 418.98,
39
+ "eval_steps_per_second": 7.563,
40
  "step": 39
41
  }
42
  ],
run-0/checkpoint-39/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9df7fcee919248151322838e3ee2bda70121eb4c651bc0be349c4f5f62deeaf
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dedec8cb35bb4edec4b57e62ce9549f5bcd5baa05a7b09482e7f936fba95bd7
3
  size 5432
run-0/checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee1e01f564ab20f9119e0efa77357cde7af5a4cb8d40e9d5be30ce84b313f8cb
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6bd0bd4447c86fa907715bb20e5259062d2eb35ad8c4c9ae594e0c25acd0c2
3
  size 876038394
run-0/checkpoint-78/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 78,
3
- "best_metric": 0.6931407942238267,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-78",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,69 +11,69 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.2564102564102564,
14
- "grad_norm": 1.662626028060913,
15
  "learning_rate": 9.487179487179487e-05,
16
  "loss": 0.696,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5128205128205128,
21
- "grad_norm": 2.0300467014312744,
22
  "learning_rate": 8.974358974358975e-05,
23
  "loss": 0.6793,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.7692307692307693,
28
- "grad_norm": 4.490738868713379,
29
  "learning_rate": 8.461538461538461e-05,
30
  "loss": 0.6499,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 1.0,
35
- "eval_accuracy": 0.631768953068592,
36
- "eval_loss": 0.6309793591499329,
37
- "eval_runtime": 0.6606,
38
- "eval_samples_per_second": 419.298,
39
- "eval_steps_per_second": 7.569,
40
  "step": 39
41
  },
42
  {
43
  "epoch": 1.0256410256410255,
44
- "grad_norm": 3.481339454650879,
45
  "learning_rate": 7.948717948717948e-05,
46
- "loss": 0.6793,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 1.282051282051282,
51
- "grad_norm": 4.932971477508545,
52
  "learning_rate": 7.435897435897436e-05,
53
- "loss": 0.5602,
54
  "step": 50
55
  },
56
  {
57
  "epoch": 1.5384615384615383,
58
- "grad_norm": 7.099682807922363,
59
  "learning_rate": 6.923076923076924e-05,
60
- "loss": 0.5998,
61
  "step": 60
62
  },
63
  {
64
  "epoch": 1.7948717948717947,
65
- "grad_norm": 3.696152925491333,
66
  "learning_rate": 6.410256410256412e-05,
67
- "loss": 0.5403,
68
  "step": 70
69
  },
70
  {
71
  "epoch": 2.0,
72
- "eval_accuracy": 0.6931407942238267,
73
- "eval_loss": 0.6287456154823303,
74
- "eval_runtime": 0.6619,
75
- "eval_samples_per_second": 418.511,
76
- "eval_steps_per_second": 7.554,
77
  "step": 78
78
  }
79
  ],
 
1
  {
2
  "best_global_step": 78,
3
+ "best_metric": 0.6859205776173285,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-78",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.2564102564102564,
14
+ "grad_norm": 1.662625789642334,
15
  "learning_rate": 9.487179487179487e-05,
16
  "loss": 0.696,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.5128205128205128,
21
+ "grad_norm": 2.0300142765045166,
22
  "learning_rate": 8.974358974358975e-05,
23
  "loss": 0.6793,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.7692307692307693,
28
+ "grad_norm": 4.4956440925598145,
29
  "learning_rate": 8.461538461538461e-05,
30
  "loss": 0.6499,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 1.0,
35
+ "eval_accuracy": 0.6173285198555957,
36
+ "eval_loss": 0.6330550909042358,
37
+ "eval_runtime": 0.6611,
38
+ "eval_samples_per_second": 418.98,
39
+ "eval_steps_per_second": 7.563,
40
  "step": 39
41
  },
42
  {
43
  "epoch": 1.0256410256410255,
44
+ "grad_norm": 3.44403338432312,
45
  "learning_rate": 7.948717948717948e-05,
46
+ "loss": 0.6782,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 1.282051282051282,
51
+ "grad_norm": 5.425619602203369,
52
  "learning_rate": 7.435897435897436e-05,
53
+ "loss": 0.5831,
54
  "step": 50
55
  },
56
  {
57
  "epoch": 1.5384615384615383,
58
+ "grad_norm": 11.772316932678223,
59
  "learning_rate": 6.923076923076924e-05,
60
+ "loss": 0.5307,
61
  "step": 60
62
  },
63
  {
64
  "epoch": 1.7948717948717947,
65
+ "grad_norm": 3.624584674835205,
66
  "learning_rate": 6.410256410256412e-05,
67
+ "loss": 0.5396,
68
  "step": 70
69
  },
70
  {
71
  "epoch": 2.0,
72
+ "eval_accuracy": 0.6859205776173285,
73
+ "eval_loss": 0.6415449380874634,
74
+ "eval_runtime": 0.6613,
75
+ "eval_samples_per_second": 418.848,
76
+ "eval_steps_per_second": 7.56,
77
  "step": 78
78
  }
79
  ],
run-0/checkpoint-78/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9df7fcee919248151322838e3ee2bda70121eb4c651bc0be349c4f5f62deeaf
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dedec8cb35bb4edec4b57e62ce9549f5bcd5baa05a7b09482e7f936fba95bd7
3
  size 5432