Zking136 commited on
Commit
2375ea0
·
verified ·
1 Parent(s): e0b64fc

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +3 -0
  3. rng_state.pth +3 -0
  4. scaler.pt +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +141 -0
  7. training_args.bin +3 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c6385609a6b55499eb3faed98ade1024154923f852428953632820268f5501b
3
  size 437961724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:759e5a842f3048c57bd55eebe211cb92e2b5dacebb3e68541014273e94b2a3a4
3
  size 437961724
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b13b05f5ace592687e86e3e153782e7c21a58ffb7b857f9a3820dad0119d978
3
+ size 876044939
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb46cbd184ddb3ea6c036b3de056d6c9c0bf79965383996adb97a80324eecba0
3
+ size 14645
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:011cddbd80d56274c6cdf2ef6540a6631fc71a1c4ee2a3bd48f51af7775d9509
3
+ size 1383
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39ae5c533a8277d8e0a3e4f104cbbde54d66196b4b2c290e7fff9f5732b5ce7d
3
+ size 1465
trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 3726,
3
+ "best_metric": 0.18097351491451263,
4
+ "best_model_checkpoint": "./model\\checkpoint-3726",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 5589,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2683843263553409,
14
+ "grad_norm": 4.979691505432129,
15
+ "learning_rate": 4.557165861513688e-05,
16
+ "loss": 0.618,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.5367686527106817,
21
+ "grad_norm": 10.513946533203125,
22
+ "learning_rate": 4.109858650921453e-05,
23
+ "loss": 0.3634,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.8051529790660226,
28
+ "grad_norm": 0.9428963661193848,
29
+ "learning_rate": 3.662551440329218e-05,
30
+ "loss": 0.2803,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_accuracy": 0.9363758389261745,
36
+ "eval_f1": 0.936185778659979,
37
+ "eval_loss": 0.21076564490795135,
38
+ "eval_runtime": 37.2544,
39
+ "eval_samples_per_second": 199.977,
40
+ "eval_steps_per_second": 12.509,
41
+ "step": 1863
42
+ },
43
+ {
44
+ "epoch": 1.0735373054213633,
45
+ "grad_norm": 4.273161888122559,
46
+ "learning_rate": 3.215244229736983e-05,
47
+ "loss": 0.2266,
48
+ "step": 2000
49
+ },
50
+ {
51
+ "epoch": 1.3419216317767042,
52
+ "grad_norm": 9.790454864501953,
53
+ "learning_rate": 2.767937019144749e-05,
54
+ "loss": 0.1773,
55
+ "step": 2500
56
+ },
57
+ {
58
+ "epoch": 1.6103059581320451,
59
+ "grad_norm": 0.9836813807487488,
60
+ "learning_rate": 2.3215244229736983e-05,
61
+ "loss": 0.1549,
62
+ "step": 3000
63
+ },
64
+ {
65
+ "epoch": 1.8786902844873858,
66
+ "grad_norm": 5.523079872131348,
67
+ "learning_rate": 1.8751118268026483e-05,
68
+ "loss": 0.1524,
69
+ "step": 3500
70
+ },
71
+ {
72
+ "epoch": 2.0,
73
+ "eval_accuracy": 0.9499328859060403,
74
+ "eval_f1": 0.9499109066150679,
75
+ "eval_loss": 0.18097351491451263,
76
+ "eval_runtime": 37.2921,
77
+ "eval_samples_per_second": 199.774,
78
+ "eval_steps_per_second": 12.496,
79
+ "step": 3726
80
+ },
81
+ {
82
+ "epoch": 2.1470746108427265,
83
+ "grad_norm": 0.0565815269947052,
84
+ "learning_rate": 1.4278046162104134e-05,
85
+ "loss": 0.1086,
86
+ "step": 4000
87
+ },
88
+ {
89
+ "epoch": 2.4154589371980677,
90
+ "grad_norm": 0.1514054834842682,
91
+ "learning_rate": 9.804974056181785e-06,
92
+ "loss": 0.0918,
93
+ "step": 4500
94
+ },
95
+ {
96
+ "epoch": 2.6838432635534084,
97
+ "grad_norm": 0.029303351417183876,
98
+ "learning_rate": 5.331901950259439e-06,
99
+ "loss": 0.084,
100
+ "step": 5000
101
+ },
102
+ {
103
+ "epoch": 2.952227589908749,
104
+ "grad_norm": 0.0346352718770504,
105
+ "learning_rate": 8.588298443370907e-07,
106
+ "loss": 0.0709,
107
+ "step": 5500
108
+ },
109
+ {
110
+ "epoch": 3.0,
111
+ "eval_accuracy": 0.9575838926174497,
112
+ "eval_f1": 0.9576189144243515,
113
+ "eval_loss": 0.19314545392990112,
114
+ "eval_runtime": 37.4152,
115
+ "eval_samples_per_second": 199.117,
116
+ "eval_steps_per_second": 12.455,
117
+ "step": 5589
118
+ }
119
+ ],
120
+ "logging_steps": 500,
121
+ "max_steps": 5589,
122
+ "num_input_tokens_seen": 0,
123
+ "num_train_epochs": 3,
124
+ "save_steps": 500,
125
+ "stateful_callbacks": {
126
+ "TrainerControl": {
127
+ "args": {
128
+ "should_epoch_stop": false,
129
+ "should_evaluate": false,
130
+ "should_log": false,
131
+ "should_save": true,
132
+ "should_training_stop": true
133
+ },
134
+ "attributes": {}
135
+ }
136
+ },
137
+ "total_flos": 2.3521550204630016e+16,
138
+ "train_batch_size": 16,
139
+ "trial_name": null,
140
+ "trial_params": null
141
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc1e524146d6310e51751575751860a0b7e8cf94fcc2f0fbef92d0e70c0d980
3
+ size 5649