JLB-JLB commited on
Commit
929507e
·
1 Parent(s): f139ed4

Training in progress, step 30

Browse files
all_results.json CHANGED
@@ -5,9 +5,9 @@
5
  "eval_runtime": 3.2987,
6
  "eval_samples_per_second": 40.319,
7
  "eval_steps_per_second": 5.154,
8
- "total_flos": 3.205097416476426e+17,
9
- "train_loss": 0.15786233500522726,
10
- "train_runtime": 149.2116,
11
- "train_samples_per_second": 27.719,
12
- "train_steps_per_second": 0.456
13
  }
 
5
  "eval_runtime": 3.2987,
6
  "eval_samples_per_second": 40.319,
7
  "eval_steps_per_second": 5.154,
8
+ "total_flos": 3.254692734332928e+17,
9
+ "train_loss": 0.01579295479071637,
10
+ "train_runtime": 122.9749,
11
+ "train_samples_per_second": 33.633,
12
+ "train_steps_per_second": 1.073
13
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbb471982be87396397be4b0e1109a341772023833225fac2262d691333b47e4
3
  size 343272234
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e7a48dec3e798626006da1124a5327352069797d658fa2fa6ca28b109a6675
3
  size 343272234
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "total_flos": 3.205097416476426e+17,
4
- "train_loss": 0.15786233500522726,
5
- "train_runtime": 149.2116,
6
- "train_samples_per_second": 27.719,
7
- "train_steps_per_second": 0.456
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "total_flos": 3.254692734332928e+17,
4
+ "train_loss": 0.01579295479071637,
5
+ "train_runtime": 122.9749,
6
+ "train_samples_per_second": 33.633,
7
+ "train_steps_per_second": 1.073
8
  }
trainer_state.json CHANGED
@@ -1,82 +1,142 @@
1
  {
2
- "best_metric": 0.046887390315532684,
3
- "best_model_checkpoint": "/content/drive/MyDrive/Model_folder/checkpoint-60",
4
  "epoch": 4.0,
5
  "eval_steps": 30,
6
- "global_step": 68,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.59,
13
- "learning_rate": 0.00017058823529411766,
14
- "loss": 0.5948,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 1.18,
19
- "learning_rate": 0.0001411764705882353,
20
- "loss": 0.2063,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 1.76,
25
- "learning_rate": 0.00011176470588235294,
26
- "loss": 0.1126,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 1.76,
31
- "eval_loss": 0.09689675271511078,
32
- "eval_matthews_correlation": 0.9667292567110256,
33
- "eval_runtime": 2.7626,
34
- "eval_samples_per_second": 48.143,
35
- "eval_steps_per_second": 6.154,
36
  "step": 30
37
  },
38
  {
39
- "epoch": 2.35,
40
- "learning_rate": 8.23529411764706e-05,
41
- "loss": 0.0605,
42
  "step": 40
43
  },
44
  {
45
- "epoch": 2.94,
46
- "learning_rate": 5.294117647058824e-05,
47
- "loss": 0.0453,
48
  "step": 50
49
  },
50
  {
51
- "epoch": 3.53,
52
- "learning_rate": 2.3529411764705884e-05,
53
- "loss": 0.031,
54
  "step": 60
55
  },
56
  {
57
- "epoch": 3.53,
58
- "eval_loss": 0.046887390315532684,
59
- "eval_matthews_correlation": 0.9888040854737966,
60
- "eval_runtime": 2.3092,
61
- "eval_samples_per_second": 57.596,
62
- "eval_steps_per_second": 7.362,
63
  "step": 60
64
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  {
66
  "epoch": 4.0,
67
- "step": 68,
68
- "total_flos": 3.205097416476426e+17,
69
- "train_loss": 0.15786233500522726,
70
- "train_runtime": 149.2116,
71
- "train_samples_per_second": 27.719,
72
- "train_steps_per_second": 0.456
73
  }
74
  ],
75
  "logging_steps": 10,
76
- "max_steps": 68,
77
  "num_train_epochs": 4,
78
  "save_steps": 30,
79
- "total_flos": 3.205097416476426e+17,
80
  "trial_name": null,
81
  "trial_params": null
82
  }
 
1
  {
2
+ "best_metric": 0.03015263006091118,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/Model_folder/checkpoint-30",
4
  "epoch": 4.0,
5
  "eval_steps": 30,
6
+ "global_step": 132,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.3,
13
+ "learning_rate": 0.00018484848484848484,
14
+ "loss": 0.1596,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.61,
19
+ "learning_rate": 0.00016969696969696972,
20
+ "loss": 0.0748,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.91,
25
+ "learning_rate": 0.00015454545454545454,
26
+ "loss": 0.0958,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.91,
31
+ "eval_loss": 0.03015263006091118,
32
+ "eval_matthews_correlation": 0.9888040854737966,
33
+ "eval_runtime": 2.9483,
34
+ "eval_samples_per_second": 45.11,
35
+ "eval_steps_per_second": 5.766,
36
  "step": 30
37
  },
38
  {
39
+ "epoch": 1.21,
40
+ "learning_rate": 0.0001393939393939394,
41
+ "loss": 0.0802,
42
  "step": 40
43
  },
44
  {
45
+ "epoch": 1.52,
46
+ "learning_rate": 0.00012424242424242425,
47
+ "loss": 0.0318,
48
  "step": 50
49
  },
50
  {
51
+ "epoch": 1.82,
52
+ "learning_rate": 0.00010909090909090909,
53
+ "loss": 0.0289,
54
  "step": 60
55
  },
56
  {
57
+ "epoch": 1.82,
58
+ "eval_loss": 0.045789625495672226,
59
+ "eval_matthews_correlation": 0.9888050243347044,
60
+ "eval_runtime": 1.9847,
61
+ "eval_samples_per_second": 67.012,
62
+ "eval_steps_per_second": 8.565,
63
  "step": 60
64
  },
65
+ {
66
+ "epoch": 2.12,
67
+ "learning_rate": 9.393939393939395e-05,
68
+ "loss": 0.0211,
69
+ "step": 70
70
+ },
71
+ {
72
+ "epoch": 2.42,
73
+ "learning_rate": 7.878787878787879e-05,
74
+ "loss": 0.0097,
75
+ "step": 80
76
+ },
77
+ {
78
+ "epoch": 2.73,
79
+ "learning_rate": 6.363636363636364e-05,
80
+ "loss": 0.0085,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 2.73,
85
+ "eval_loss": 0.05030398443341255,
86
+ "eval_matthews_correlation": 0.9888040854737966,
87
+ "eval_runtime": 2.13,
88
+ "eval_samples_per_second": 62.441,
89
+ "eval_steps_per_second": 7.981,
90
+ "step": 90
91
+ },
92
+ {
93
+ "epoch": 3.03,
94
+ "learning_rate": 4.848484848484849e-05,
95
+ "loss": 0.0072,
96
+ "step": 100
97
+ },
98
+ {
99
+ "epoch": 3.33,
100
+ "learning_rate": 3.3333333333333335e-05,
101
+ "loss": 0.0071,
102
+ "step": 110
103
+ },
104
+ {
105
+ "epoch": 3.64,
106
+ "learning_rate": 1.8181818181818182e-05,
107
+ "loss": 0.0065,
108
+ "step": 120
109
+ },
110
+ {
111
+ "epoch": 3.64,
112
+ "eval_loss": 0.05042246729135513,
113
+ "eval_matthews_correlation": 0.9888040854737966,
114
+ "eval_runtime": 2.8921,
115
+ "eval_samples_per_second": 45.988,
116
+ "eval_steps_per_second": 5.878,
117
+ "step": 120
118
+ },
119
+ {
120
+ "epoch": 3.94,
121
+ "learning_rate": 3.0303030303030305e-06,
122
+ "loss": 0.0062,
123
+ "step": 130
124
+ },
125
  {
126
  "epoch": 4.0,
127
+ "step": 132,
128
+ "total_flos": 3.254692734332928e+17,
129
+ "train_loss": 0.01579295479071637,
130
+ "train_runtime": 122.9749,
131
+ "train_samples_per_second": 33.633,
132
+ "train_steps_per_second": 1.073
133
  }
134
  ],
135
  "logging_steps": 10,
136
+ "max_steps": 132,
137
  "num_train_epochs": 4,
138
  "save_steps": 30,
139
+ "total_flos": 3.254692734332928e+17,
140
  "trial_name": null,
141
  "trial_params": null
142
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f0efe6e2503d74d756fbdf7f4557af95d22ee8da89e50aebca9c9557104d523
3
  size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4abf93990c82b841efb6a8f8aab603e55e9c850cfab05d41e1fd822a546a6d
3
  size 4536