k4black commited on
Commit
10f796a
·
1 Parent(s): 0377890

Training in progress, step 1800

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f8db643355ba94f446bdd2108b3ad40b89c4a17909a32d21478755e0770925c
3
  size 2843228158
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf5eedaffdfa30482e714f831e7789ec7adabd5bf0475bdef3326ee344156f16
3
  size 2843228158
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26be059b9e3d553123dfe572fb1912a3e2fd6ab2703ff4d1787a0e42d6a5bd4c
3
  size 1421588461
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f65bf00ecada6d6f6b70b7db5964c0b3eb0b617f54e353d7314aa97df0133d3e
3
  size 1421588461
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1786c28a3ff60b9e5cc75072494414aab01774a7e1faabac2400987838a11166
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d43dde0c6d7da51ae34de8e125f82f70689f5e9858a0ddef8cfa60a79fee44
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f8f20db91521a631ff32f0f157e2cda744d77bd4375968aaad18f626c72d1c8
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be3d111334799f516f34ca6adfa4185e3a4579faed68581da6d86666f61107e
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65de131eded300332b8d2247dfce2d1bcf678299231de832fceba967a9c4d776
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f14c36be893ba57470539a7a5d2a8d5493e0e3009136b7c4dd8af472cd2b198
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.1649205985062833,
3
- "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli-default/checkpoint-200",
4
- "epoch": 0.011649580615097856,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -22,11 +22,139 @@
22
  "eval_samples_per_second": 811.913,
23
  "eval_steps_per_second": 25.408,
24
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "max_steps": 17168,
28
  "num_train_epochs": 1,
29
- "total_flos": 569120757375168.0,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
 
1
  {
2
+ "best_metric": 0.9006977145472175,
3
+ "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli-default/checkpoint-1800",
4
+ "epoch": 0.1048462255358807,
5
+ "global_step": 1800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
22
  "eval_samples_per_second": 811.913,
23
  "eval_steps_per_second": 25.408,
24
  "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.02,
28
+ "learning_rate": 4.621653084982538e-06,
29
+ "loss": 0.9455,
30
+ "step": 400
31
+ },
32
+ {
33
+ "epoch": 0.02,
34
+ "eval_accuracy": 0.8268644584434058,
35
+ "eval_f1": 0.8266178743634524,
36
+ "eval_loss": 0.47128933668136597,
37
+ "eval_runtime": 12.689,
38
+ "eval_samples_per_second": 775.634,
39
+ "eval_steps_per_second": 24.273,
40
+ "step": 400
41
+ },
42
+ {
43
+ "epoch": 0.03,
44
+ "learning_rate": 6.949941792782305e-06,
45
+ "loss": 0.4696,
46
+ "step": 600
47
+ },
48
+ {
49
+ "epoch": 0.03,
50
+ "eval_accuracy": 0.8723836618573461,
51
+ "eval_f1": 0.8706168205019141,
52
+ "eval_loss": 0.3847072422504425,
53
+ "eval_runtime": 12.9905,
54
+ "eval_samples_per_second": 757.631,
55
+ "eval_steps_per_second": 23.71,
56
+ "step": 600
57
+ },
58
+ {
59
+ "epoch": 0.05,
60
+ "learning_rate": 9.278230500582073e-06,
61
+ "loss": 0.4354,
62
+ "step": 800
63
+ },
64
+ {
65
+ "epoch": 0.05,
66
+ "eval_accuracy": 0.8812233285917497,
67
+ "eval_f1": 0.881475018184628,
68
+ "eval_loss": 0.3468632698059082,
69
+ "eval_runtime": 12.7292,
70
+ "eval_samples_per_second": 773.183,
71
+ "eval_steps_per_second": 24.196,
72
+ "step": 800
73
+ },
74
+ {
75
+ "epoch": 0.06,
76
+ "learning_rate": 9.915384143724325e-06,
77
+ "loss": 0.4001,
78
+ "step": 1000
79
+ },
80
+ {
81
+ "epoch": 0.06,
82
+ "eval_accuracy": 0.8652712863239179,
83
+ "eval_f1": 0.8617608879520094,
84
+ "eval_loss": 0.3995899260044098,
85
+ "eval_runtime": 12.6196,
86
+ "eval_samples_per_second": 779.895,
87
+ "eval_steps_per_second": 24.406,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 0.07,
92
+ "learning_rate": 9.792752467962475e-06,
93
+ "loss": 0.3818,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.07,
98
+ "eval_accuracy": 0.8905710221499695,
99
+ "eval_f1": 0.8904945421102081,
100
+ "eval_loss": 0.34042540192604065,
101
+ "eval_runtime": 12.5333,
102
+ "eval_samples_per_second": 785.265,
103
+ "eval_steps_per_second": 24.574,
104
+ "step": 1200
105
+ },
106
+ {
107
+ "epoch": 0.08,
108
+ "learning_rate": 9.670120792200626e-06,
109
+ "loss": 0.3577,
110
+ "step": 1400
111
+ },
112
+ {
113
+ "epoch": 0.08,
114
+ "eval_accuracy": 0.8973785815891079,
115
+ "eval_f1": 0.8974465861723097,
116
+ "eval_loss": 0.30269765853881836,
117
+ "eval_runtime": 12.932,
118
+ "eval_samples_per_second": 761.059,
119
+ "eval_steps_per_second": 23.817,
120
+ "step": 1400
121
+ },
122
+ {
123
+ "epoch": 0.09,
124
+ "learning_rate": 9.547489116438778e-06,
125
+ "loss": 0.3751,
126
+ "step": 1600
127
+ },
128
+ {
129
+ "epoch": 0.09,
130
+ "eval_accuracy": 0.8871164397480187,
131
+ "eval_f1": 0.8849187888163478,
132
+ "eval_loss": 0.33452147245407104,
133
+ "eval_runtime": 12.0676,
134
+ "eval_samples_per_second": 815.571,
135
+ "eval_steps_per_second": 25.523,
136
+ "step": 1600
137
+ },
138
+ {
139
+ "epoch": 0.1,
140
+ "learning_rate": 9.424857440676928e-06,
141
+ "loss": 0.3471,
142
+ "step": 1800
143
+ },
144
+ {
145
+ "epoch": 0.1,
146
+ "eval_accuracy": 0.9014427961796383,
147
+ "eval_f1": 0.9006977145472175,
148
+ "eval_loss": 0.27975374460220337,
149
+ "eval_runtime": 12.2016,
150
+ "eval_samples_per_second": 806.617,
151
+ "eval_steps_per_second": 25.243,
152
+ "step": 1800
153
  }
154
  ],
155
  "max_steps": 17168,
156
  "num_train_epochs": 1,
157
+ "total_flos": 5081780648726976.0,
158
  "trial_name": null,
159
  "trial_params": null
160
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26be059b9e3d553123dfe572fb1912a3e2fd6ab2703ff4d1787a0e42d6a5bd4c
3
  size 1421588461
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f65bf00ecada6d6f6b70b7db5964c0b3eb0b617f54e353d7314aa97df0133d3e
3
  size 1421588461