antoine-444 commited on
Commit
db4d7f1
·
verified ·
1 Parent(s): 7110591

Upload folder using huggingface_hub

Browse files
generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token_id": 151643,
3
- "eos_token_id": 151643,
4
- "max_new_tokens": 2048,
5
- "transformers_version": "4.51.3"
6
- }
 
 
 
 
 
 
 
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae1869aea5467ffc88a428bc881a5e8da420e5b315f456962050117e347f9441
3
- size 2384459962
 
 
 
 
rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:69f50a692634404f2eebb2eab9f456865957578d752987bc52d843ac2a774366
3
- size 14244
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9197244c5ee8fd84c23cb5387dd7cd4b0d34bb7720142963e9ea404ddb17646d
3
- size 1064
 
 
 
 
trainer_state.json DELETED
@@ -1,166 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 1.0,
6
- "eval_steps": 200,
7
- "global_step": 1250,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.08,
14
- "grad_norm": 7.40625,
15
- "learning_rate": 4.604e-05,
16
- "loss": 1.1267,
17
- "step": 100
18
- },
19
- {
20
- "epoch": 0.16,
21
- "grad_norm": 5.71875,
22
- "learning_rate": 4.2040000000000004e-05,
23
- "loss": 1.1115,
24
- "step": 200
25
- },
26
- {
27
- "epoch": 0.16,
28
- "eval_loss": 1.4295576810836792,
29
- "eval_runtime": 81.4705,
30
- "eval_samples_per_second": 44.728,
31
- "eval_steps_per_second": 5.597,
32
- "step": 200
33
- },
34
- {
35
- "epoch": 0.24,
36
- "grad_norm": 5.8125,
37
- "learning_rate": 3.804e-05,
38
- "loss": 1.0758,
39
- "step": 300
40
- },
41
- {
42
- "epoch": 0.32,
43
- "grad_norm": 4.875,
44
- "learning_rate": 3.404e-05,
45
- "loss": 1.0637,
46
- "step": 400
47
- },
48
- {
49
- "epoch": 0.32,
50
- "eval_loss": 1.4099386930465698,
51
- "eval_runtime": 81.8405,
52
- "eval_samples_per_second": 44.526,
53
- "eval_steps_per_second": 5.572,
54
- "step": 400
55
- },
56
- {
57
- "epoch": 0.4,
58
- "grad_norm": 5.90625,
59
- "learning_rate": 3.004e-05,
60
- "loss": 1.0337,
61
- "step": 500
62
- },
63
- {
64
- "epoch": 0.48,
65
- "grad_norm": 5.125,
66
- "learning_rate": 2.6040000000000005e-05,
67
- "loss": 1.025,
68
- "step": 600
69
- },
70
- {
71
- "epoch": 0.48,
72
- "eval_loss": 1.3943334817886353,
73
- "eval_runtime": 81.4207,
74
- "eval_samples_per_second": 44.755,
75
- "eval_steps_per_second": 5.601,
76
- "step": 600
77
- },
78
- {
79
- "epoch": 0.56,
80
- "grad_norm": 5.125,
81
- "learning_rate": 2.2040000000000002e-05,
82
- "loss": 1.0328,
83
- "step": 700
84
- },
85
- {
86
- "epoch": 0.64,
87
- "grad_norm": 5.84375,
88
- "learning_rate": 1.804e-05,
89
- "loss": 1.0097,
90
- "step": 800
91
- },
92
- {
93
- "epoch": 0.64,
94
- "eval_loss": 1.3861989974975586,
95
- "eval_runtime": 81.4518,
96
- "eval_samples_per_second": 44.738,
97
- "eval_steps_per_second": 5.598,
98
- "step": 800
99
- },
100
- {
101
- "epoch": 0.72,
102
- "grad_norm": 5.0625,
103
- "learning_rate": 1.4040000000000001e-05,
104
- "loss": 1.0091,
105
- "step": 900
106
- },
107
- {
108
- "epoch": 0.8,
109
- "grad_norm": 5.8125,
110
- "learning_rate": 1.004e-05,
111
- "loss": 0.9927,
112
- "step": 1000
113
- },
114
- {
115
- "epoch": 0.8,
116
- "eval_loss": 1.3814911842346191,
117
- "eval_runtime": 81.426,
118
- "eval_samples_per_second": 44.752,
119
- "eval_steps_per_second": 5.6,
120
- "step": 1000
121
- },
122
- {
123
- "epoch": 0.88,
124
- "grad_norm": 6.5625,
125
- "learning_rate": 6.040000000000001e-06,
126
- "loss": 1.0129,
127
- "step": 1100
128
- },
129
- {
130
- "epoch": 0.96,
131
- "grad_norm": 4.8125,
132
- "learning_rate": 2.0400000000000004e-06,
133
- "loss": 0.988,
134
- "step": 1200
135
- },
136
- {
137
- "epoch": 0.96,
138
- "eval_loss": 1.3815840482711792,
139
- "eval_runtime": 81.4118,
140
- "eval_samples_per_second": 44.76,
141
- "eval_steps_per_second": 5.601,
142
- "step": 1200
143
- }
144
- ],
145
- "logging_steps": 100,
146
- "max_steps": 1250,
147
- "num_input_tokens_seen": 0,
148
- "num_train_epochs": 1,
149
- "save_steps": 500,
150
- "stateful_callbacks": {
151
- "TrainerControl": {
152
- "args": {
153
- "should_epoch_stop": false,
154
- "should_evaluate": false,
155
- "should_log": false,
156
- "should_save": true,
157
- "should_training_stop": true
158
- },
159
- "attributes": {}
160
- }
161
- },
162
- "total_flos": 2.706232049664e+16,
163
- "train_batch_size": 8,
164
- "trial_name": null,
165
- "trial_params": null
166
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5891e3b928ad2808f900a2b53aa256fe6c83df2b1853b1d06afa110235dcafb
3
- size 5304