ccore commited on
Commit
6a39c4c
·
verified ·
1 Parent(s): 50e1ca7

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "ccore/RDW",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
 
1
  {
2
+ "_name_or_path": "ccore/RDW2",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7f177ac17d529e29efb6e2c90b4fac1aac05de2d76cd0a83c56954cf8b22078
3
  size 500979600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c25ff01c23d13ed3137b2274e6ca83d4a20e50cc5caa72cbb5e10a7b23252df9
3
  size 500979600
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed9d1686584d5775da006b6d922cf6c3eacb2065b66a18f07fbae7e722e33ba8
3
  size 1002078330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f0300a6d21bd10dd2251435d23b360d5ea677c5d3bcc755b496012552e39e42
3
  size 1002078330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6edbde201324905f6602d4ac3a66a2f9df04374cec29badfe0ef7e38dacc7b2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aee0a005e9c584d35058f5950f9994ef813f416a0e69d6e9ccf6aa888689f86
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c54e76d8423a6511375b8501c7ed973174257694d1818266bf8544e0951cc13c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a547db3d9bb996ec70227440fa3c775a359414e054f09a8f9234161532ebca1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,119 +1,20 @@
1
  {
2
- "best_metric": 0.29721641540527344,
3
- "best_model_checkpoint": "./opt_trained/checkpoint-2576",
4
- "epoch": 9.0,
5
  "eval_steps": 500,
6
- "global_step": 2898,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.29984787106513977,
14
- "eval_runtime": 145.4869,
15
- "eval_samples_per_second": 19.631,
16
- "eval_steps_per_second": 4.908,
17
  "step": 322
18
- },
19
- {
20
- "epoch": 1.5540856031128405,
21
- "grad_norm": 2.1858065128326416,
22
- "learning_rate": 3.940433505043101e-05,
23
- "loss": 0.2998,
24
- "step": 500
25
- },
26
- {
27
- "epoch": 2.0,
28
- "eval_loss": 0.29886579513549805,
29
- "eval_runtime": 145.5758,
30
- "eval_samples_per_second": 19.619,
31
- "eval_steps_per_second": 4.905,
32
- "step": 644
33
- },
34
- {
35
- "epoch": 3.0,
36
- "eval_loss": 0.29954472184181213,
37
- "eval_runtime": 145.5114,
38
- "eval_samples_per_second": 19.627,
39
- "eval_steps_per_second": 4.907,
40
- "step": 966
41
- },
42
- {
43
- "epoch": 3.1058365758754864,
44
- "grad_norm": 2.7013330459594727,
45
- "learning_rate": 3.765282187493851e-05,
46
- "loss": 0.2968,
47
- "step": 1000
48
- },
49
- {
50
- "epoch": 4.0,
51
- "eval_loss": 0.2978520393371582,
52
- "eval_runtime": 145.3893,
53
- "eval_samples_per_second": 19.644,
54
- "eval_steps_per_second": 4.911,
55
- "step": 1288
56
- },
57
- {
58
- "epoch": 4.659922178988327,
59
- "grad_norm": 2.5350475311279297,
60
- "learning_rate": 3.484979197425745e-05,
61
- "loss": 0.2948,
62
- "step": 1500
63
- },
64
- {
65
- "epoch": 5.0,
66
- "eval_loss": 0.29741692543029785,
67
- "eval_runtime": 145.627,
68
- "eval_samples_per_second": 19.612,
69
- "eval_steps_per_second": 4.903,
70
- "step": 1610
71
- },
72
- {
73
- "epoch": 6.0,
74
- "eval_loss": 0.29865920543670654,
75
- "eval_runtime": 145.5148,
76
- "eval_samples_per_second": 19.627,
77
- "eval_steps_per_second": 4.907,
78
- "step": 1932
79
- },
80
- {
81
- "epoch": 6.211673151750973,
82
- "grad_norm": 2.5450387001037598,
83
- "learning_rate": 3.11703315146825e-05,
84
- "loss": 0.292,
85
- "step": 2000
86
- },
87
- {
88
- "epoch": 7.0,
89
- "eval_loss": 0.29770833253860474,
90
- "eval_runtime": 146.0174,
91
- "eval_samples_per_second": 19.559,
92
- "eval_steps_per_second": 4.89,
93
- "step": 2254
94
- },
95
- {
96
- "epoch": 7.765758754863813,
97
- "grad_norm": 2.2594358921051025,
98
- "learning_rate": 2.6818939512889293e-05,
99
- "loss": 0.2897,
100
- "step": 2500
101
- },
102
- {
103
- "epoch": 8.0,
104
- "eval_loss": 0.29721641540527344,
105
- "eval_runtime": 146.0416,
106
- "eval_samples_per_second": 19.556,
107
- "eval_steps_per_second": 4.889,
108
- "step": 2576
109
- },
110
- {
111
- "epoch": 9.0,
112
- "eval_loss": 0.29853811860084534,
113
- "eval_runtime": 145.9347,
114
- "eval_samples_per_second": 19.57,
115
- "eval_steps_per_second": 4.893,
116
- "step": 2898
117
  }
118
  ],
119
  "logging_steps": 500,
@@ -133,7 +34,7 @@
133
  "attributes": {}
134
  }
135
  },
136
- "total_flos": 1.2085017772032e+17,
137
  "train_batch_size": 10,
138
  "trial_name": null,
139
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2863014340400696,
3
+ "best_model_checkpoint": "./opt_trained/checkpoint-322",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 322,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.2863014340400696,
14
+ "eval_runtime": 114.3677,
15
+ "eval_samples_per_second": 24.972,
16
+ "eval_steps_per_second": 6.243,
17
  "step": 322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
  "logging_steps": 500,
 
34
  "attributes": {}
35
  }
36
  },
37
+ "total_flos": 1.014952664448e+16,
38
  "train_batch_size": 10,
39
  "trial_name": null,
40
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85dbb59c613e3aa11d2eed3526fd27daf20a8edc39c428468a730da95c5973db
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e5977c21c679dfb75a8cca2ae67dcb35c2eb17a3f181db6ee924c38ad4e6f39
3
  size 5368