antoine-444 commited on
Commit
7110591
·
verified ·
1 Parent(s): 9f2d443

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +100 -58
  5. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d5259f63701323f722761a70bc120c9eac3ca5b4f21ffa24b7e0b55f0a2771e
3
  size 1192135096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8240242a3d0c2ae4d658a46f65180d5bfcc3377148abaab6229f38648f372be5
3
  size 1192135096
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec5cc8aa219267408c45a4d037896a55c856faab11e2505858c17e4c6415d0f3
3
  size 2384459962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1869aea5467ffc88a428bc881a5e8da420e5b315f456962050117e347f9441
3
  size 2384459962
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de16677c82ff272c2a0bd0cd189a45a0e7a858ab45925adad4f6dc891d27b809
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9197244c5ee8fd84c23cb5387dd7cd4b0d34bb7720142963e9ea404ddb17646d
3
  size 1064
trainer_state.json CHANGED
@@ -3,105 +3,147 @@
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
- "eval_steps": 100,
7
- "global_step": 625,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.16,
14
- "grad_norm": 6.5,
15
- "learning_rate": 4.2080000000000004e-05,
16
- "loss": 1.2105,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.16,
21
- "eval_loss": 1.458847165107727,
22
- "eval_runtime": 128.1964,
23
- "eval_samples_per_second": 44.744,
24
- "eval_steps_per_second": 5.593,
25
- "step": 100
26
  },
27
  {
28
- "epoch": 0.32,
29
- "grad_norm": 6.34375,
30
- "learning_rate": 3.408e-05,
31
- "loss": 1.1564,
 
32
  "step": 200
33
  },
 
 
 
 
 
 
 
34
  {
35
  "epoch": 0.32,
36
- "eval_loss": 1.4528062343597412,
37
- "eval_runtime": 128.0959,
38
- "eval_samples_per_second": 44.779,
39
- "eval_steps_per_second": 5.597,
40
- "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  },
42
  {
43
  "epoch": 0.48,
44
- "grad_norm": 7.40625,
45
- "learning_rate": 2.6079999999999998e-05,
46
- "loss": 1.1265,
47
- "step": 300
48
  },
49
  {
50
  "epoch": 0.48,
51
- "eval_loss": 1.4363205432891846,
52
- "eval_runtime": 128.0611,
53
- "eval_samples_per_second": 44.791,
54
- "eval_steps_per_second": 5.599,
55
- "step": 300
 
 
 
 
 
 
 
56
  },
57
  {
58
  "epoch": 0.64,
59
- "grad_norm": 6.1875,
60
- "learning_rate": 1.808e-05,
61
- "loss": 1.1266,
62
- "step": 400
63
  },
64
  {
65
  "epoch": 0.64,
66
- "eval_loss": 1.428483009338379,
67
- "eval_runtime": 128.0722,
68
- "eval_samples_per_second": 44.787,
69
  "eval_steps_per_second": 5.598,
70
- "step": 400
 
 
 
 
 
 
 
71
  },
72
  {
73
  "epoch": 0.8,
74
- "grad_norm": 6.03125,
75
- "learning_rate": 1.008e-05,
76
- "loss": 1.1208,
77
- "step": 500
78
  },
79
  {
80
  "epoch": 0.8,
81
- "eval_loss": 1.4252334833145142,
82
- "eval_runtime": 128.0781,
83
- "eval_samples_per_second": 44.785,
84
- "eval_steps_per_second": 5.598,
85
- "step": 500
 
 
 
 
 
 
 
86
  },
87
  {
88
  "epoch": 0.96,
89
- "grad_norm": 5.59375,
90
- "learning_rate": 2.08e-06,
91
- "loss": 1.1074,
92
- "step": 600
93
  },
94
  {
95
  "epoch": 0.96,
96
- "eval_loss": 1.4235466718673706,
97
- "eval_runtime": 128.1144,
98
- "eval_samples_per_second": 44.772,
99
- "eval_steps_per_second": 5.597,
100
- "step": 600
101
  }
102
  ],
103
  "logging_steps": 100,
104
- "max_steps": 625,
105
  "num_input_tokens_seen": 0,
106
  "num_train_epochs": 1,
107
  "save_steps": 500,
@@ -117,8 +159,8 @@
117
  "attributes": {}
118
  }
119
  },
120
- "total_flos": 1.353116024832e+16,
121
- "train_batch_size": 4,
122
  "trial_name": null,
123
  "trial_params": null
124
  }
 
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
+ "eval_steps": 200,
7
+ "global_step": 1250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.08,
14
+ "grad_norm": 7.40625,
15
+ "learning_rate": 4.604e-05,
16
+ "loss": 1.1267,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.16,
21
+ "grad_norm": 5.71875,
22
+ "learning_rate": 4.2040000000000004e-05,
23
+ "loss": 1.1115,
24
+ "step": 200
 
25
  },
26
  {
27
+ "epoch": 0.16,
28
+ "eval_loss": 1.4295576810836792,
29
+ "eval_runtime": 81.4705,
30
+ "eval_samples_per_second": 44.728,
31
+ "eval_steps_per_second": 5.597,
32
  "step": 200
33
  },
34
+ {
35
+ "epoch": 0.24,
36
+ "grad_norm": 5.8125,
37
+ "learning_rate": 3.804e-05,
38
+ "loss": 1.0758,
39
+ "step": 300
40
+ },
41
  {
42
  "epoch": 0.32,
43
+ "grad_norm": 4.875,
44
+ "learning_rate": 3.404e-05,
45
+ "loss": 1.0637,
46
+ "step": 400
47
+ },
48
+ {
49
+ "epoch": 0.32,
50
+ "eval_loss": 1.4099386930465698,
51
+ "eval_runtime": 81.8405,
52
+ "eval_samples_per_second": 44.526,
53
+ "eval_steps_per_second": 5.572,
54
+ "step": 400
55
+ },
56
+ {
57
+ "epoch": 0.4,
58
+ "grad_norm": 5.90625,
59
+ "learning_rate": 3.004e-05,
60
+ "loss": 1.0337,
61
+ "step": 500
62
  },
63
  {
64
  "epoch": 0.48,
65
+ "grad_norm": 5.125,
66
+ "learning_rate": 2.6040000000000005e-05,
67
+ "loss": 1.025,
68
+ "step": 600
69
  },
70
  {
71
  "epoch": 0.48,
72
+ "eval_loss": 1.3943334817886353,
73
+ "eval_runtime": 81.4207,
74
+ "eval_samples_per_second": 44.755,
75
+ "eval_steps_per_second": 5.601,
76
+ "step": 600
77
+ },
78
+ {
79
+ "epoch": 0.56,
80
+ "grad_norm": 5.125,
81
+ "learning_rate": 2.2040000000000002e-05,
82
+ "loss": 1.0328,
83
+ "step": 700
84
  },
85
  {
86
  "epoch": 0.64,
87
+ "grad_norm": 5.84375,
88
+ "learning_rate": 1.804e-05,
89
+ "loss": 1.0097,
90
+ "step": 800
91
  },
92
  {
93
  "epoch": 0.64,
94
+ "eval_loss": 1.3861989974975586,
95
+ "eval_runtime": 81.4518,
96
+ "eval_samples_per_second": 44.738,
97
  "eval_steps_per_second": 5.598,
98
+ "step": 800
99
+ },
100
+ {
101
+ "epoch": 0.72,
102
+ "grad_norm": 5.0625,
103
+ "learning_rate": 1.4040000000000001e-05,
104
+ "loss": 1.0091,
105
+ "step": 900
106
  },
107
  {
108
  "epoch": 0.8,
109
+ "grad_norm": 5.8125,
110
+ "learning_rate": 1.004e-05,
111
+ "loss": 0.9927,
112
+ "step": 1000
113
  },
114
  {
115
  "epoch": 0.8,
116
+ "eval_loss": 1.3814911842346191,
117
+ "eval_runtime": 81.426,
118
+ "eval_samples_per_second": 44.752,
119
+ "eval_steps_per_second": 5.6,
120
+ "step": 1000
121
+ },
122
+ {
123
+ "epoch": 0.88,
124
+ "grad_norm": 6.5625,
125
+ "learning_rate": 6.040000000000001e-06,
126
+ "loss": 1.0129,
127
+ "step": 1100
128
  },
129
  {
130
  "epoch": 0.96,
131
+ "grad_norm": 4.8125,
132
+ "learning_rate": 2.0400000000000004e-06,
133
+ "loss": 0.988,
134
+ "step": 1200
135
  },
136
  {
137
  "epoch": 0.96,
138
+ "eval_loss": 1.3815840482711792,
139
+ "eval_runtime": 81.4118,
140
+ "eval_samples_per_second": 44.76,
141
+ "eval_steps_per_second": 5.601,
142
+ "step": 1200
143
  }
144
  ],
145
  "logging_steps": 100,
146
+ "max_steps": 1250,
147
  "num_input_tokens_seen": 0,
148
  "num_train_epochs": 1,
149
  "save_steps": 500,
 
159
  "attributes": {}
160
  }
161
  },
162
+ "total_flos": 2.706232049664e+16,
163
+ "train_batch_size": 8,
164
  "trial_name": null,
165
  "trial_params": null
166
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a7349d2d8120da787c6450659b502b45dafc69ba556136c839ae39ead81bc46
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5891e3b928ad2808f900a2b53aa256fe6c83df2b1853b1d06afa110235dcafb
3
  size 5304