MHGanainy commited on
Commit
a17285e
·
verified ·
1 Parent(s): 4bab0d1

MHGanainy/16-clusters-imbalanced-12

Browse files
Files changed (5) hide show
  1. README.md +2 -0
  2. all_results.json +13 -0
  3. eval_results.json +8 -0
  4. train_results.json +8 -0
  5. trainer_state.json +168 -0
README.md CHANGED
@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
15
  # 16-clusters-imbalanced-12
16
 
17
  This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 
 
18
 
19
  ## Model description
20
 
 
15
  # 16-clusters-imbalanced-12
16
 
17
  This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 1.6388
20
 
21
  ## Model description
22
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.6387864351272583,
4
+ "eval_runtime": 22.9548,
5
+ "eval_samples_per_second": 13.766,
6
+ "eval_steps_per_second": 1.743,
7
+ "perplexity": 5.148917174177862,
8
+ "total_flos": 3.3575032356864e+16,
9
+ "train_loss": 1.8945055998765028,
10
+ "train_runtime": 572.07,
11
+ "train_samples_per_second": 6.459,
12
+ "train_steps_per_second": 3.23
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.6387864351272583,
4
+ "eval_runtime": 22.9548,
5
+ "eval_samples_per_second": 13.766,
6
+ "eval_steps_per_second": 1.743,
7
+ "perplexity": 5.148917174177862
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 3.3575032356864e+16,
4
+ "train_loss": 1.8945055998765028,
5
+ "train_runtime": 572.07,
6
+ "train_samples_per_second": 6.459,
7
+ "train_steps_per_second": 3.23
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1848,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05411255411255411,
13
+ "grad_norm": 0.18986542522907257,
14
+ "learning_rate": 6.666666666666667e-06,
15
+ "loss": 2.3446,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.10822510822510822,
20
+ "grad_norm": 0.1643804907798767,
21
+ "learning_rate": 1.3333333333333333e-05,
22
+ "loss": 2.332,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.16233766233766234,
27
+ "grad_norm": 0.34427669644355774,
28
+ "learning_rate": 2e-05,
29
+ "loss": 2.2662,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.21645021645021645,
34
+ "grad_norm": 0.4920704960823059,
35
+ "learning_rate": 1.979477194660769e-05,
36
+ "loss": 2.0925,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.27056277056277056,
41
+ "grad_norm": 0.6333794593811035,
42
+ "learning_rate": 1.91875114972106e-05,
43
+ "loss": 2.0147,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.3246753246753247,
48
+ "grad_norm": 0.7752760648727417,
49
+ "learning_rate": 1.820314402779511e-05,
50
+ "loss": 1.952,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.3787878787878788,
55
+ "grad_norm": 0.8382052779197693,
56
+ "learning_rate": 1.6882073502275394e-05,
57
+ "loss": 1.8939,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.4329004329004329,
62
+ "grad_norm": 0.8314985036849976,
63
+ "learning_rate": 1.527852406712072e-05,
64
+ "loss": 1.8505,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.487012987012987,
69
+ "grad_norm": 1.1022111177444458,
70
+ "learning_rate": 1.3458314388150115e-05,
71
+ "loss": 1.8473,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.5411255411255411,
76
+ "grad_norm": 0.9701792001724243,
77
+ "learning_rate": 1.149615608319978e-05,
78
+ "loss": 1.7982,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 0.5952380952380952,
83
+ "grad_norm": 1.1665358543395996,
84
+ "learning_rate": 9.472587138144215e-06,
85
+ "loss": 1.751,
86
+ "step": 1100
87
+ },
88
+ {
89
+ "epoch": 0.6493506493506493,
90
+ "grad_norm": 1.110221266746521,
91
+ "learning_rate": 7.470666176083193e-06,
92
+ "loss": 1.7613,
93
+ "step": 1200
94
+ },
95
+ {
96
+ "epoch": 0.7034632034632035,
97
+ "grad_norm": 1.02971351146698,
98
+ "learning_rate": 5.572563265434528e-06,
99
+ "loss": 1.7219,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 0.7575757575757576,
104
+ "grad_norm": 0.9258909225463867,
105
+ "learning_rate": 3.8561871992963585e-06,
106
+ "loss": 1.7393,
107
+ "step": 1400
108
+ },
109
+ {
110
+ "epoch": 0.8116883116883117,
111
+ "grad_norm": 1.0694141387939453,
112
+ "learning_rate": 2.3919876814572197e-06,
113
+ "loss": 1.6905,
114
+ "step": 1500
115
+ },
116
+ {
117
+ "epoch": 0.8658008658008658,
118
+ "grad_norm": 0.9484215378761292,
119
+ "learning_rate": 1.240063675281934e-06,
120
+ "loss": 1.7174,
121
+ "step": 1600
122
+ },
123
+ {
124
+ "epoch": 0.9199134199134199,
125
+ "grad_norm": 1.0407582521438599,
126
+ "learning_rate": 4.4769660505914136e-07,
127
+ "loss": 1.7107,
128
+ "step": 1700
129
+ },
130
+ {
131
+ "epoch": 0.974025974025974,
132
+ "grad_norm": 1.056659460067749,
133
+ "learning_rate": 4.740966106764222e-08,
134
+ "loss": 1.6829,
135
+ "step": 1800
136
+ },
137
+ {
138
+ "epoch": 1.0,
139
+ "step": 1848,
140
+ "total_flos": 3.3575032356864e+16,
141
+ "train_loss": 1.8945055998765028,
142
+ "train_runtime": 572.07,
143
+ "train_samples_per_second": 6.459,
144
+ "train_steps_per_second": 3.23
145
+ }
146
+ ],
147
+ "logging_steps": 100,
148
+ "max_steps": 1848,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 1,
151
+ "save_steps": 500,
152
+ "stateful_callbacks": {
153
+ "TrainerControl": {
154
+ "args": {
155
+ "should_epoch_stop": false,
156
+ "should_evaluate": false,
157
+ "should_log": false,
158
+ "should_save": true,
159
+ "should_training_stop": true
160
+ },
161
+ "attributes": {}
162
+ }
163
+ },
164
+ "total_flos": 3.3575032356864e+16,
165
+ "train_batch_size": 2,
166
+ "trial_name": null,
167
+ "trial_params": null
168
+ }