Francesco0101 commited on
Commit
a6def81
·
verified ·
1 Parent(s): 3c61e5d

Training in progress, step 9000

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Francesco0101/FRABERT-roberta-base-TRAIN",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "roberta-base",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab1e4e0f32413e03237dfc09ecc5eac8b332d3b35ca54209776824735d1a41d1
3
  size 498615900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f681568b3e5f0dc0b8662337967bea6bcc2a1b337d087b4393a53bd748f1550
3
  size 498615900
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1f9a40d333c3d73f84ff4f045202da4ab16f2c52bd58fa06ab682bd5e838dab
3
  size 997351674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bb81114b5d0b803046d64d44b543ca971de0505793d404486e4fae1e75bb73f
3
  size 997351674
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2e69d5ce442e31ea2fc2e1b9498a5d1f83032f2480959767c6fa7f59745dc6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1aab4d25312c697f24e79aca383328a702077888f66748d1caa0693f08bf5df
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c76c4fad033d3e87048e12b297d55c1a5decfde290b9558738733abe687cd9ed
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934377a13b65a28f5105294df26e4fe3b57c4d88e0a4908173ecc003fa91b9bb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7496885984237682,
3
- "best_model_checkpoint": "training_dir/checkpoint-7000",
4
- "epoch": 1.252740369558409,
5
  "eval_steps": 1000,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -83,82 +83,6 @@
83
  "eval_samples_per_second": 50.569,
84
  "eval_steps_per_second": 6.321,
85
  "step": 4000
86
- },
87
- {
88
- "epoch": 0.7829627309740056,
89
- "grad_norm": 44.1195182800293,
90
- "learning_rate": 9.289772727272728e-06,
91
- "loss": 0.4542,
92
- "step": 5000
93
- },
94
- {
95
- "epoch": 0.7829627309740056,
96
- "eval_accuracy": 0.7517482517482518,
97
- "eval_f1": 0.7410787462847774,
98
- "eval_loss": 0.8051876425743103,
99
- "eval_precision": 0.750223869921944,
100
- "eval_recall": 0.7517482517482518,
101
- "eval_runtime": 49.8353,
102
- "eval_samples_per_second": 45.911,
103
- "eval_steps_per_second": 5.739,
104
- "step": 5000
105
- },
106
- {
107
- "epoch": 0.9395552771688067,
108
- "grad_norm": 120.46759796142578,
109
- "learning_rate": 9.131944444444445e-06,
110
- "loss": 0.4268,
111
- "step": 6000
112
- },
113
- {
114
- "epoch": 0.9395552771688067,
115
- "eval_accuracy": 0.7556818181818182,
116
- "eval_f1": 0.748088378029472,
117
- "eval_loss": 0.9902251958847046,
118
- "eval_precision": 0.757243064686842,
119
- "eval_recall": 0.7556818181818182,
120
- "eval_runtime": 49.224,
121
- "eval_samples_per_second": 46.481,
122
- "eval_steps_per_second": 5.81,
123
- "step": 6000
124
- },
125
- {
126
- "epoch": 1.096147823363608,
127
- "grad_norm": 60.6891975402832,
128
- "learning_rate": 8.974116161616161e-06,
129
- "loss": 0.4014,
130
- "step": 7000
131
- },
132
- {
133
- "epoch": 1.096147823363608,
134
- "eval_accuracy": 0.7574300699300699,
135
- "eval_f1": 0.7496885984237682,
136
- "eval_loss": 0.9940707087516785,
137
- "eval_precision": 0.7543068859180688,
138
- "eval_recall": 0.7574300699300699,
139
- "eval_runtime": 49.3422,
140
- "eval_samples_per_second": 46.37,
141
- "eval_steps_per_second": 5.796,
142
- "step": 7000
143
- },
144
- {
145
- "epoch": 1.252740369558409,
146
- "grad_norm": 43.56020736694336,
147
- "learning_rate": 8.816287878787879e-06,
148
- "loss": 0.4071,
149
- "step": 8000
150
- },
151
- {
152
- "epoch": 1.252740369558409,
153
- "eval_accuracy": 0.7390734265734266,
154
- "eval_f1": 0.7219289922874507,
155
- "eval_loss": 0.9887688755989075,
156
- "eval_precision": 0.738468925570974,
157
- "eval_recall": 0.7390734265734266,
158
- "eval_runtime": 49.3611,
159
- "eval_samples_per_second": 46.352,
160
- "eval_steps_per_second": 5.794,
161
- "step": 8000
162
  }
163
  ],
164
  "logging_steps": 1000,
@@ -178,7 +102,7 @@
178
  "attributes": {}
179
  }
180
  },
181
- "total_flos": 1.6378012691020512e+16,
182
  "train_batch_size": 8,
183
  "trial_name": null,
184
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7341503634182242,
3
+ "best_model_checkpoint": "training_dir/checkpoint-2000",
4
+ "epoch": 0.6263701847792045,
5
  "eval_steps": 1000,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
83
  "eval_samples_per_second": 50.569,
84
  "eval_steps_per_second": 6.321,
85
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  }
87
  ],
88
  "logging_steps": 1000,
 
102
  "attributes": {}
103
  }
104
  },
105
+ "total_flos": 8197088860811088.0,
106
  "train_batch_size": 8,
107
  "trial_name": null,
108
  "trial_params": null
logs/events.out.tfevents.1720707245.61453e59f6ad.199.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b7e0a7476de91e519e6f6ff2bda2314dd37fe6d6b7b495b4b9dd689a6b8c86
3
- size 7814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee57a123a49bf02dc62a57f3bf6e48fce8abedba3555585cb79a9ee9d6216de
3
+ size 8497
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab1e4e0f32413e03237dfc09ecc5eac8b332d3b35ca54209776824735d1a41d1
3
  size 498615900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99cddd0ac2d7edc610f7607512228252c93a4caa3507b3463098de6905c1c69
3
  size 498615900