JulienRPA commited on
Commit
cd2245f
·
1 Parent(s): 2c70dc3

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.731570151719994,
4
- "eval_loss": 0.9372276663780212,
5
- "eval_runtime": 264.4068,
6
- "eval_samples": 49327,
7
- "eval_samples_per_second": 186.557,
8
- "eval_steps_per_second": 2.916,
9
- "perplexity": 2.5528941244824197,
10
- "train_loss": 2.219597038788229,
11
- "train_runtime": 7937.3216,
12
- "train_samples": 147965,
13
- "train_samples_per_second": 55.925,
14
- "train_steps_per_second": 0.874
15
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9527164483214212,
4
+ "eval_loss": 0.30882975459098816,
5
+ "eval_runtime": 165.0761,
6
+ "eval_samples": 50092,
7
+ "eval_samples_per_second": 303.448,
8
+ "eval_steps_per_second": 4.743,
9
+ "perplexity": 1.3618305053756028,
10
+ "train_loss": 0.9907198370619974,
11
+ "train_runtime": 6301.0396,
12
+ "train_samples": 150247,
13
+ "train_samples_per_second": 71.534,
14
+ "train_steps_per_second": 1.118
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.731570151719994,
4
- "eval_loss": 0.9372276663780212,
5
- "eval_runtime": 264.4068,
6
- "eval_samples": 49327,
7
- "eval_samples_per_second": 186.557,
8
- "eval_steps_per_second": 2.916,
9
- "perplexity": 2.5528941244824197
10
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9527164483214212,
4
+ "eval_loss": 0.30882975459098816,
5
+ "eval_runtime": 165.0761,
6
+ "eval_samples": 50092,
7
+ "eval_samples_per_second": 303.448,
8
+ "eval_steps_per_second": 4.743,
9
+ "perplexity": 1.3618305053756028
10
  }
runs/May30_07-15-46_3105702ed930/events.out.tfevents.1685437630.3105702ed930.2539.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7516315525768811ca4dc24e6d897092ca611322964ac13a7ee60d629b93f242
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 2.219597038788229,
4
- "train_runtime": 7937.3216,
5
- "train_samples": 147965,
6
- "train_samples_per_second": 55.925,
7
- "train_steps_per_second": 0.874
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.9907198370619974,
4
+ "train_runtime": 6301.0396,
5
+ "train_samples": 150247,
6
+ "train_samples_per_second": 71.534,
7
+ "train_steps_per_second": 1.118
8
  }
trainer_state.json CHANGED
@@ -2,102 +2,129 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 6936,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.22,
12
- "learning_rate": 4.639561707035756e-05,
13
- "loss": 6.8529,
14
- "step": 500
15
  },
16
  {
17
  "epoch": 0.43,
18
- "learning_rate": 4.2791234140715114e-05,
19
- "loss": 3.542,
 
 
 
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 0.65,
24
- "learning_rate": 3.9186851211072664e-05,
25
- "loss": 2.9635,
26
- "step": 1500
27
  },
28
  {
29
- "epoch": 0.87,
30
- "learning_rate": 3.558246828143022e-05,
31
- "loss": 2.5274,
 
 
 
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 1.08,
36
- "learning_rate": 3.1978085351787776e-05,
37
- "loss": 2.2054,
38
- "step": 2500
39
  },
40
  {
41
- "epoch": 1.3,
42
- "learning_rate": 2.8373702422145332e-05,
43
- "loss": 1.9297,
 
 
 
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 1.51,
48
- "learning_rate": 2.4769319492502884e-05,
49
- "loss": 1.7394,
50
- "step": 3500
51
  },
52
  {
53
- "epoch": 1.73,
54
- "learning_rate": 2.116493656286044e-05,
55
- "loss": 1.5874,
 
 
 
56
  "step": 4000
57
  },
58
  {
59
- "epoch": 1.95,
60
- "learning_rate": 1.7560553633217993e-05,
61
- "loss": 1.4676,
62
- "step": 4500
63
  },
64
  {
65
- "epoch": 2.16,
66
- "learning_rate": 1.395617070357555e-05,
67
- "loss": 1.3606,
 
 
 
68
  "step": 5000
69
  },
70
  {
71
- "epoch": 2.38,
72
- "learning_rate": 1.0351787773933102e-05,
73
- "loss": 1.2739,
74
- "step": 5500
75
  },
76
  {
77
- "epoch": 2.6,
78
- "learning_rate": 6.747404844290659e-06,
79
- "loss": 1.2049,
 
 
 
80
  "step": 6000
81
  },
82
  {
83
- "epoch": 2.81,
84
- "learning_rate": 3.143021914648212e-06,
85
- "loss": 1.1527,
86
- "step": 6500
 
 
 
 
 
 
 
 
 
87
  },
88
  {
89
  "epoch": 3.0,
90
- "step": 6936,
91
- "total_flos": 1.9168865726431344e+16,
92
- "train_loss": 2.219597038788229,
93
- "train_runtime": 7937.3216,
94
- "train_samples_per_second": 55.925,
95
- "train_steps_per_second": 0.874
96
  }
97
  ],
98
- "max_steps": 6936,
99
  "num_train_epochs": 3,
100
- "total_flos": 1.9168865726431344e+16,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 7044,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.43,
12
+ "learning_rate": 4.2901760363429874e-05,
13
+ "loss": 3.9512,
14
+ "step": 1000
15
  },
16
  {
17
  "epoch": 0.43,
18
+ "eval_accuracy": 0.6988671808767591,
19
+ "eval_loss": 1.3428400754928589,
20
+ "eval_runtime": 171.0039,
21
+ "eval_samples_per_second": 292.929,
22
+ "eval_steps_per_second": 4.579,
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 0.85,
27
+ "learning_rate": 3.580352072685974e-05,
28
+ "loss": 1.0044,
29
+ "step": 2000
30
  },
31
  {
32
+ "epoch": 0.85,
33
+ "eval_accuracy": 0.9013550464459579,
34
+ "eval_loss": 0.5994584560394287,
35
+ "eval_runtime": 170.4181,
36
+ "eval_samples_per_second": 293.936,
37
+ "eval_steps_per_second": 4.595,
38
  "step": 2000
39
  },
40
  {
41
+ "epoch": 1.28,
42
+ "learning_rate": 2.8705281090289608e-05,
43
+ "loss": 0.5483,
44
+ "step": 3000
45
  },
46
  {
47
+ "epoch": 1.28,
48
+ "eval_accuracy": 0.9309371495565297,
49
+ "eval_loss": 0.446216881275177,
50
+ "eval_runtime": 171.3996,
51
+ "eval_samples_per_second": 292.253,
52
+ "eval_steps_per_second": 4.568,
53
  "step": 3000
54
  },
55
  {
56
+ "epoch": 1.7,
57
+ "learning_rate": 2.160704145371948e-05,
58
+ "loss": 0.436,
59
+ "step": 4000
60
  },
61
  {
62
+ "epoch": 1.7,
63
+ "eval_accuracy": 0.9376707432277173,
64
+ "eval_loss": 0.40857475996017456,
65
+ "eval_runtime": 171.9437,
66
+ "eval_samples_per_second": 291.328,
67
+ "eval_steps_per_second": 4.554,
68
  "step": 4000
69
  },
70
  {
71
+ "epoch": 2.13,
72
+ "learning_rate": 1.4508801817149347e-05,
73
+ "loss": 0.3764,
74
+ "step": 5000
75
  },
76
  {
77
+ "epoch": 2.13,
78
+ "eval_accuracy": 0.9462951194712761,
79
+ "eval_loss": 0.3566935658454895,
80
+ "eval_runtime": 171.3127,
81
+ "eval_samples_per_second": 292.401,
82
+ "eval_steps_per_second": 4.571,
83
  "step": 5000
84
  },
85
  {
86
+ "epoch": 2.56,
87
+ "learning_rate": 7.410562180579217e-06,
88
+ "loss": 0.337,
89
+ "step": 6000
90
  },
91
  {
92
+ "epoch": 2.56,
93
+ "eval_accuracy": 0.9496153650211422,
94
+ "eval_loss": 0.3342791795730591,
95
+ "eval_runtime": 171.713,
96
+ "eval_samples_per_second": 291.719,
97
+ "eval_steps_per_second": 4.56,
98
  "step": 6000
99
  },
100
  {
101
+ "epoch": 2.98,
102
+ "learning_rate": 3.1232254400908575e-07,
103
+ "loss": 0.3115,
104
+ "step": 7000
105
+ },
106
+ {
107
+ "epoch": 2.98,
108
+ "eval_accuracy": 0.95252108208063,
109
+ "eval_loss": 0.30891212821006775,
110
+ "eval_runtime": 171.2387,
111
+ "eval_samples_per_second": 292.527,
112
+ "eval_steps_per_second": 4.573,
113
+ "step": 7000
114
  },
115
  {
116
  "epoch": 3.0,
117
+ "step": 7044,
118
+ "total_flos": 1.2593673024207894e+16,
119
+ "train_loss": 0.9907198370619974,
120
+ "train_runtime": 6301.0396,
121
+ "train_samples_per_second": 71.534,
122
+ "train_steps_per_second": 1.118
123
  }
124
  ],
125
+ "max_steps": 7044,
126
  "num_train_epochs": 3,
127
+ "total_flos": 1.2593673024207894e+16,
128
  "trial_name": null,
129
  "trial_params": null
130
  }