BartLunenborg commited on
Commit
137457a
·
1 Parent(s): 408b5e3

Trained and model and test file

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. finetuned/checkpoint-1000/optimizer.pt +0 -3
  2. finetuned/checkpoint-1000/rng_state.pth +0 -3
  3. finetuned/checkpoint-1000/trainer_state.json +0 -47
  4. finetuned/checkpoint-1500/config.json +0 -35
  5. finetuned/checkpoint-1500/rng_state.pth +0 -3
  6. finetuned/checkpoint-1500/scheduler.pt +0 -3
  7. finetuned/checkpoint-1500/trainer_state.json +0 -54
  8. finetuned/checkpoint-1500/training_args.bin +0 -3
  9. finetuned/checkpoint-2000/config.json +0 -35
  10. finetuned/checkpoint-2000/model.safetensors +0 -3
  11. finetuned/checkpoint-2000/trainer_state.json +0 -61
  12. finetuned/checkpoint-2500/model.safetensors +0 -3
  13. finetuned/checkpoint-2500/optimizer.pt +0 -3
  14. finetuned/checkpoint-2500/scheduler.pt +0 -3
  15. finetuned/checkpoint-2500/trainer_state.json +0 -68
  16. finetuned/checkpoint-2500/training_args.bin +0 -3
  17. finetuned/checkpoint-3000/config.json +0 -35
  18. finetuned/checkpoint-3000/model.safetensors +0 -3
  19. finetuned/checkpoint-3000/optimizer.pt +0 -3
  20. finetuned/checkpoint-3000/rng_state.pth +0 -3
  21. finetuned/checkpoint-3000/scheduler.pt +0 -3
  22. finetuned/checkpoint-3000/trainer_state.json +0 -75
  23. finetuned/checkpoint-3000/training_args.bin +0 -3
  24. finetuned/checkpoint-3500/config.json +0 -35
  25. finetuned/checkpoint-3500/model.safetensors +0 -3
  26. finetuned/checkpoint-3500/optimizer.pt +0 -3
  27. finetuned/checkpoint-3500/rng_state.pth +0 -3
  28. finetuned/checkpoint-3500/scheduler.pt +0 -3
  29. finetuned/checkpoint-3500/trainer_state.json +0 -82
  30. finetuned/checkpoint-3500/training_args.bin +0 -3
  31. finetuned/checkpoint-3910/config.json +0 -35
  32. finetuned/checkpoint-3910/model.safetensors +0 -3
  33. finetuned/checkpoint-3910/optimizer.pt +0 -3
  34. finetuned/checkpoint-3910/rng_state.pth +0 -3
  35. finetuned/checkpoint-3910/scheduler.pt +0 -3
  36. finetuned/checkpoint-3910/trainer_state.json +0 -82
  37. finetuned/checkpoint-3910/training_args.bin +0 -3
  38. finetuned/{checkpoint-1000 → checkpoint-4000}/config.json +1 -1
  39. finetuned/{checkpoint-1500 → checkpoint-4000}/model.safetensors +1 -1
  40. finetuned/{checkpoint-1500 → checkpoint-4000}/optimizer.pt +2 -2
  41. finetuned/{checkpoint-2000 → checkpoint-4000}/rng_state.pth +1 -1
  42. finetuned/{checkpoint-1000 → checkpoint-4000}/scheduler.pt +1 -1
  43. finetuned/checkpoint-4000/trainer_state.json +209 -0
  44. finetuned/{checkpoint-2000 → checkpoint-4000}/training_args.bin +2 -2
  45. finetuned/{checkpoint-2500 → checkpoint-4478}/config.json +1 -1
  46. finetuned/{checkpoint-1000 → checkpoint-4478}/model.safetensors +1 -1
  47. finetuned/{checkpoint-2000 → checkpoint-4478}/optimizer.pt +2 -2
  48. finetuned/{checkpoint-2500 → checkpoint-4478}/rng_state.pth +1 -1
  49. finetuned/{checkpoint-2000 → checkpoint-4478}/scheduler.pt +1 -1
  50. finetuned/checkpoint-4478/trainer_state.json +216 -0
finetuned/checkpoint-1000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:34f7dc356694bc193a20fcd5d3d89cb71c8dfbd059d16ecd1df5d4f2f57789d4
3
- size 876038330
 
 
 
 
finetuned/checkpoint-1000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc53217f065c0885a0a689c3821c9fcb75774575fb9467a6417cad07882490d5
3
- size 14244
 
 
 
 
finetuned/checkpoint-1000/trainer_state.json DELETED
@@ -1,47 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.278772378516624,
5
- "eval_steps": 500,
6
- "global_step": 1000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.639386189258312,
13
- "grad_norm": 533343.5625,
14
- "learning_rate": 8.721227621483377e-06,
15
- "loss": 0.2807,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 1.278772378516624,
20
- "grad_norm": 293576.46875,
21
- "learning_rate": 7.442455242966753e-06,
22
- "loss": 0.1817,
23
- "step": 1000
24
- }
25
- ],
26
- "logging_steps": 500,
27
- "max_steps": 3910,
28
- "num_input_tokens_seen": 0,
29
- "num_train_epochs": 5,
30
- "save_steps": 500,
31
- "stateful_callbacks": {
32
- "TrainerControl": {
33
- "args": {
34
- "should_epoch_stop": false,
35
- "should_evaluate": false,
36
- "should_log": false,
37
- "should_save": true,
38
- "should_training_stop": false
39
- },
40
- "attributes": {}
41
- }
42
- },
43
- "total_flos": 8411199995512320.0,
44
- "train_batch_size": 32,
45
- "trial_name": null,
46
- "trial_params": null
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-1500/config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "id2label": {
13
- "0": "NEGATIVE",
14
- "1": "POSITIVE"
15
- },
16
- "initializer_range": 0.02,
17
- "intermediate_size": 3072,
18
- "label2id": {
19
- "NEGATIVE": 0,
20
- "POSITIVE": 1
21
- },
22
- "layer_norm_eps": 1e-12,
23
- "max_position_embeddings": 512,
24
- "model_type": "bert",
25
- "num_attention_heads": 12,
26
- "num_hidden_layers": 12,
27
- "pad_token_id": 0,
28
- "position_embedding_type": "absolute",
29
- "problem_type": "single_label_classification",
30
- "torch_dtype": "float32",
31
- "transformers_version": "4.47.0",
32
- "type_vocab_size": 2,
33
- "use_cache": true,
34
- "vocab_size": 30522
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-1500/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3ac55272dbf5131637a60e2090309efebfe823872bbf2fe2c1677fa2e2cd562
3
- size 14244
 
 
 
 
finetuned/checkpoint-1500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d617fd58fe4ec741cd9bbc14d43f1527b3eadedfbbc9e5741d564d795210d91f
3
- size 1064
 
 
 
 
finetuned/checkpoint-1500/trainer_state.json DELETED
@@ -1,54 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.918158567774936,
5
- "eval_steps": 500,
6
- "global_step": 1500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.639386189258312,
13
- "grad_norm": 533343.5625,
14
- "learning_rate": 8.721227621483377e-06,
15
- "loss": 0.2807,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 1.278772378516624,
20
- "grad_norm": 293576.46875,
21
- "learning_rate": 7.442455242966753e-06,
22
- "loss": 0.1817,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 1.918158567774936,
27
- "grad_norm": 212895.234375,
28
- "learning_rate": 6.163682864450128e-06,
29
- "loss": 0.1532,
30
- "step": 1500
31
- }
32
- ],
33
- "logging_steps": 500,
34
- "max_steps": 3910,
35
- "num_input_tokens_seen": 0,
36
- "num_train_epochs": 5,
37
- "save_steps": 500,
38
- "stateful_callbacks": {
39
- "TrainerControl": {
40
- "args": {
41
- "should_epoch_stop": false,
42
- "should_evaluate": false,
43
- "should_log": false,
44
- "should_save": true,
45
- "should_training_stop": false
46
- },
47
- "attributes": {}
48
- }
49
- },
50
- "total_flos": 1.261689865991424e+16,
51
- "train_batch_size": 32,
52
- "trial_name": null,
53
- "trial_params": null
54
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-1500/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2d467fd004b82564bf87f840a5a063cdb96832e1ad2d05429b1e9464eca34a0
3
- size 5304
 
 
 
 
finetuned/checkpoint-2000/config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "id2label": {
13
- "0": "NEGATIVE",
14
- "1": "POSITIVE"
15
- },
16
- "initializer_range": 0.02,
17
- "intermediate_size": 3072,
18
- "label2id": {
19
- "NEGATIVE": 0,
20
- "POSITIVE": 1
21
- },
22
- "layer_norm_eps": 1e-12,
23
- "max_position_embeddings": 512,
24
- "model_type": "bert",
25
- "num_attention_heads": 12,
26
- "num_hidden_layers": 12,
27
- "pad_token_id": 0,
28
- "position_embedding_type": "absolute",
29
- "problem_type": "single_label_classification",
30
- "torch_dtype": "float32",
31
- "transformers_version": "4.47.0",
32
- "type_vocab_size": 2,
33
- "use_cache": true,
34
- "vocab_size": 30522
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-2000/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b11088c111728c93325c62ad747322f07e75d5895962a6ae6804f6214f43547f
3
- size 437958648
 
 
 
 
finetuned/checkpoint-2000/trainer_state.json DELETED
@@ -1,61 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.557544757033248,
5
- "eval_steps": 500,
6
- "global_step": 2000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.639386189258312,
13
- "grad_norm": 533343.5625,
14
- "learning_rate": 8.721227621483377e-06,
15
- "loss": 0.2807,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 1.278772378516624,
20
- "grad_norm": 293576.46875,
21
- "learning_rate": 7.442455242966753e-06,
22
- "loss": 0.1817,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 1.918158567774936,
27
- "grad_norm": 212895.234375,
28
- "learning_rate": 6.163682864450128e-06,
29
- "loss": 0.1532,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 2.557544757033248,
34
- "grad_norm": 1228927.0,
35
- "learning_rate": 4.884910485933504e-06,
36
- "loss": 0.1079,
37
- "step": 2000
38
- }
39
- ],
40
- "logging_steps": 500,
41
- "max_steps": 3910,
42
- "num_input_tokens_seen": 0,
43
- "num_train_epochs": 5,
44
- "save_steps": 500,
45
- "stateful_callbacks": {
46
- "TrainerControl": {
47
- "args": {
48
- "should_epoch_stop": false,
49
- "should_evaluate": false,
50
- "should_log": false,
51
- "should_save": true,
52
- "should_training_stop": false
53
- },
54
- "attributes": {}
55
- }
56
- },
57
- "total_flos": 1.682006488040832e+16,
58
- "train_batch_size": 32,
59
- "trial_name": null,
60
- "trial_params": null
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-2500/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:908d9d02379b868830e83e8c186373c262ce29ef16bdca187427a80212f4bc16
3
- size 437958648
 
 
 
 
finetuned/checkpoint-2500/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:38838ae6c3801dc7aeb827ed841ac89270954c096484a5d7ba985946927c9676
3
- size 876038330
 
 
 
 
finetuned/checkpoint-2500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:22434b050bd3651c8706a9c9f40e9bc7aabffee18cb6bce61a5af85e533c0f44
3
- size 1064
 
 
 
 
finetuned/checkpoint-2500/trainer_state.json DELETED
@@ -1,68 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 3.1969309462915603,
5
- "eval_steps": 500,
6
- "global_step": 2500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.639386189258312,
13
- "grad_norm": 533343.5625,
14
- "learning_rate": 8.721227621483377e-06,
15
- "loss": 0.2807,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 1.278772378516624,
20
- "grad_norm": 293576.46875,
21
- "learning_rate": 7.442455242966753e-06,
22
- "loss": 0.1817,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 1.918158567774936,
27
- "grad_norm": 212895.234375,
28
- "learning_rate": 6.163682864450128e-06,
29
- "loss": 0.1532,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 2.557544757033248,
34
- "grad_norm": 1228927.0,
35
- "learning_rate": 4.884910485933504e-06,
36
- "loss": 0.1079,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 3.1969309462915603,
41
- "grad_norm": 178655.1875,
42
- "learning_rate": 3.60613810741688e-06,
43
- "loss": 0.1019,
44
- "step": 2500
45
- }
46
- ],
47
- "logging_steps": 500,
48
- "max_steps": 3910,
49
- "num_input_tokens_seen": 0,
50
- "num_train_epochs": 5,
51
- "save_steps": 500,
52
- "stateful_callbacks": {
53
- "TrainerControl": {
54
- "args": {
55
- "should_epoch_stop": false,
56
- "should_evaluate": false,
57
- "should_log": false,
58
- "should_save": true,
59
- "should_training_stop": false
60
- },
61
- "attributes": {}
62
- }
63
- },
64
- "total_flos": 2.10205794347976e+16,
65
- "train_batch_size": 32,
66
- "trial_name": null,
67
- "trial_params": null
68
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-2500/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2d467fd004b82564bf87f840a5a063cdb96832e1ad2d05429b1e9464eca34a0
3
- size 5304
 
 
 
 
finetuned/checkpoint-3000/config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "id2label": {
13
- "0": "NEGATIVE",
14
- "1": "POSITIVE"
15
- },
16
- "initializer_range": 0.02,
17
- "intermediate_size": 3072,
18
- "label2id": {
19
- "NEGATIVE": 0,
20
- "POSITIVE": 1
21
- },
22
- "layer_norm_eps": 1e-12,
23
- "max_position_embeddings": 512,
24
- "model_type": "bert",
25
- "num_attention_heads": 12,
26
- "num_hidden_layers": 12,
27
- "pad_token_id": 0,
28
- "position_embedding_type": "absolute",
29
- "problem_type": "single_label_classification",
30
- "torch_dtype": "float32",
31
- "transformers_version": "4.47.0",
32
- "type_vocab_size": 2,
33
- "use_cache": true,
34
- "vocab_size": 30522
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-3000/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bb1795d571ef2773eb13e791714856dd9c5122374e7e99e5b90f356035d645e
3
- size 437958648
 
 
 
 
finetuned/checkpoint-3000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac4702546e7abcea92809a026382228410e589e48d75248bbe9be1c7c36c2d49
3
- size 876038330
 
 
 
 
finetuned/checkpoint-3000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:adad5a1d42d3b0e0b4161eac348e2efbeced358e2597bda6624afeb9b3dbe137
3
- size 14244
 
 
 
 
finetuned/checkpoint-3000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c52cd26d76efa87b43e9faa2f20505bf24be8b072e8dedce581551dd946bd27
3
- size 1064
 
 
 
 
finetuned/checkpoint-3000/trainer_state.json DELETED
@@ -1,75 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 3.836317135549872,
5
- "eval_steps": 500,
6
- "global_step": 3000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.639386189258312,
13
- "grad_norm": 533343.5625,
14
- "learning_rate": 8.721227621483377e-06,
15
- "loss": 0.2807,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 1.278772378516624,
20
- "grad_norm": 293576.46875,
21
- "learning_rate": 7.442455242966753e-06,
22
- "loss": 0.1817,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 1.918158567774936,
27
- "grad_norm": 212895.234375,
28
- "learning_rate": 6.163682864450128e-06,
29
- "loss": 0.1532,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 2.557544757033248,
34
- "grad_norm": 1228927.0,
35
- "learning_rate": 4.884910485933504e-06,
36
- "loss": 0.1079,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 3.1969309462915603,
41
- "grad_norm": 178655.1875,
42
- "learning_rate": 3.60613810741688e-06,
43
- "loss": 0.1019,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 3.836317135549872,
48
- "grad_norm": 1843713.875,
49
- "learning_rate": 2.327365728900256e-06,
50
- "loss": 0.0993,
51
- "step": 3000
52
- }
53
- ],
54
- "logging_steps": 500,
55
- "max_steps": 3910,
56
- "num_input_tokens_seen": 0,
57
- "num_train_epochs": 5,
58
- "save_steps": 500,
59
- "stateful_callbacks": {
60
- "TrainerControl": {
61
- "args": {
62
- "should_epoch_stop": false,
63
- "should_evaluate": false,
64
- "should_log": false,
65
- "should_save": true,
66
- "should_training_stop": false
67
- },
68
- "attributes": {}
69
- }
70
- },
71
- "total_flos": 2.52303563205576e+16,
72
- "train_batch_size": 32,
73
- "trial_name": null,
74
- "trial_params": null
75
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-3000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2d467fd004b82564bf87f840a5a063cdb96832e1ad2d05429b1e9464eca34a0
3
- size 5304
 
 
 
 
finetuned/checkpoint-3500/config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "id2label": {
13
- "0": "NEGATIVE",
14
- "1": "POSITIVE"
15
- },
16
- "initializer_range": 0.02,
17
- "intermediate_size": 3072,
18
- "label2id": {
19
- "NEGATIVE": 0,
20
- "POSITIVE": 1
21
- },
22
- "layer_norm_eps": 1e-12,
23
- "max_position_embeddings": 512,
24
- "model_type": "bert",
25
- "num_attention_heads": 12,
26
- "num_hidden_layers": 12,
27
- "pad_token_id": 0,
28
- "position_embedding_type": "absolute",
29
- "problem_type": "single_label_classification",
30
- "torch_dtype": "float32",
31
- "transformers_version": "4.47.0",
32
- "type_vocab_size": 2,
33
- "use_cache": true,
34
- "vocab_size": 30522
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-3500/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:71a4030abab180fdb181991d1c90acafe0e1c0abb79bd299c6e3cdd2ff8f50bd
3
- size 437958648
 
 
 
 
finetuned/checkpoint-3500/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:70c863095ecf7d6fe801cf54d255f7d89d2de682dfb5d0e2fb7312733d931a27
3
- size 876038330
 
 
 
 
finetuned/checkpoint-3500/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec2710bfa27485acc2db1bd72a7d4db64a01a5d2aea25e65b26150ff601d5073
3
- size 14244
 
 
 
 
finetuned/checkpoint-3500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a52805cefea10e59a479e9ca9cc65000858d8213bdab457913dd86ef55fcab4
3
- size 1064
 
 
 
 
finetuned/checkpoint-3500/trainer_state.json DELETED
@@ -1,82 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 4.475703324808184,
5
- "eval_steps": 500,
6
- "global_step": 3500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.639386189258312,
13
- "grad_norm": 533343.5625,
14
- "learning_rate": 8.721227621483377e-06,
15
- "loss": 0.2807,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 1.278772378516624,
20
- "grad_norm": 293576.46875,
21
- "learning_rate": 7.442455242966753e-06,
22
- "loss": 0.1817,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 1.918158567774936,
27
- "grad_norm": 212895.234375,
28
- "learning_rate": 6.163682864450128e-06,
29
- "loss": 0.1532,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 2.557544757033248,
34
- "grad_norm": 1228927.0,
35
- "learning_rate": 4.884910485933504e-06,
36
- "loss": 0.1079,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 3.1969309462915603,
41
- "grad_norm": 178655.1875,
42
- "learning_rate": 3.60613810741688e-06,
43
- "loss": 0.1019,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 3.836317135549872,
48
- "grad_norm": 1843713.875,
49
- "learning_rate": 2.327365728900256e-06,
50
- "loss": 0.0993,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 4.475703324808184,
55
- "grad_norm": 966544.375,
56
- "learning_rate": 1.0485933503836318e-06,
57
- "loss": 0.0954,
58
- "step": 3500
59
- }
60
- ],
61
- "logging_steps": 500,
62
- "max_steps": 3910,
63
- "num_input_tokens_seen": 0,
64
- "num_train_epochs": 5,
65
- "save_steps": 500,
66
- "stateful_callbacks": {
67
- "TrainerControl": {
68
- "args": {
69
- "should_epoch_stop": false,
70
- "should_evaluate": false,
71
- "should_log": false,
72
- "should_save": true,
73
- "should_training_stop": false
74
- },
75
- "attributes": {}
76
- }
77
- },
78
- "total_flos": 2.943138476372688e+16,
79
- "train_batch_size": 32,
80
- "trial_name": null,
81
- "trial_params": null
82
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-3500/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2d467fd004b82564bf87f840a5a063cdb96832e1ad2d05429b1e9464eca34a0
3
- size 5304
 
 
 
 
finetuned/checkpoint-3910/config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "id2label": {
13
- "0": "NEGATIVE",
14
- "1": "POSITIVE"
15
- },
16
- "initializer_range": 0.02,
17
- "intermediate_size": 3072,
18
- "label2id": {
19
- "NEGATIVE": 0,
20
- "POSITIVE": 1
21
- },
22
- "layer_norm_eps": 1e-12,
23
- "max_position_embeddings": 512,
24
- "model_type": "bert",
25
- "num_attention_heads": 12,
26
- "num_hidden_layers": 12,
27
- "pad_token_id": 0,
28
- "position_embedding_type": "absolute",
29
- "problem_type": "single_label_classification",
30
- "torch_dtype": "float32",
31
- "transformers_version": "4.47.0",
32
- "type_vocab_size": 2,
33
- "use_cache": true,
34
- "vocab_size": 30522
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-3910/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd6371c7202fb80670a014b63253b77c372ba800266c5c52a02cdf5e348c0525
3
- size 437958648
 
 
 
 
finetuned/checkpoint-3910/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc9c2b12f4c0f337b9a1930962b4540a8ae7dcd5e499cfb44df38d5d14bc3de3
3
- size 876038330
 
 
 
 
finetuned/checkpoint-3910/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5473c0291a1c0eb36f990d0d917ac13a6888d99a6c00f1ebd7a3566a18a4bf11
3
- size 14244
 
 
 
 
finetuned/checkpoint-3910/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b89bdce6e0443b324c96f188ef932f1b73ba0bf8c16033128e8999f65da055b3
3
- size 1064
 
 
 
 
finetuned/checkpoint-3910/trainer_state.json DELETED
@@ -1,82 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "eval_steps": 500,
6
- "global_step": 3910,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.639386189258312,
13
- "grad_norm": 533343.5625,
14
- "learning_rate": 8.721227621483377e-06,
15
- "loss": 0.2807,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 1.278772378516624,
20
- "grad_norm": 293576.46875,
21
- "learning_rate": 7.442455242966753e-06,
22
- "loss": 0.1817,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 1.918158567774936,
27
- "grad_norm": 212895.234375,
28
- "learning_rate": 6.163682864450128e-06,
29
- "loss": 0.1532,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 2.557544757033248,
34
- "grad_norm": 1228927.0,
35
- "learning_rate": 4.884910485933504e-06,
36
- "loss": 0.1079,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 3.1969309462915603,
41
- "grad_norm": 178655.1875,
42
- "learning_rate": 3.60613810741688e-06,
43
- "loss": 0.1019,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 3.836317135549872,
48
- "grad_norm": 1843713.875,
49
- "learning_rate": 2.327365728900256e-06,
50
- "loss": 0.0993,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 4.475703324808184,
55
- "grad_norm": 966544.375,
56
- "learning_rate": 1.0485933503836318e-06,
57
- "loss": 0.0954,
58
- "step": 3500
59
- }
60
- ],
61
- "logging_steps": 500,
62
- "max_steps": 3910,
63
- "num_input_tokens_seen": 0,
64
- "num_train_epochs": 5,
65
- "save_steps": 500,
66
- "stateful_callbacks": {
67
- "TrainerControl": {
68
- "args": {
69
- "should_epoch_stop": false,
70
- "should_evaluate": false,
71
- "should_log": false,
72
- "should_save": true,
73
- "should_training_stop": true
74
- },
75
- "attributes": {}
76
- }
77
- },
78
- "total_flos": 3.287503158960144e+16,
79
- "train_batch_size": 32,
80
- "trial_name": null,
81
- "trial_params": null
82
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuned/checkpoint-3910/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2d467fd004b82564bf87f840a5a063cdb96832e1ad2d05429b1e9464eca34a0
3
- size 5304
 
 
 
 
finetuned/{checkpoint-1000 → checkpoint-4000}/config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "bert-base-uncased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
finetuned/{checkpoint-1500 → checkpoint-4000}/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97429bde605537275cbae7b22f2c311b0701995fe46479e20e73d33d3ffa3caf
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41a96c9dbcd10f3392344b0d993bb252e361f6a10794df174d228d32026a36bb
3
  size 437958648
finetuned/{checkpoint-1500 → checkpoint-4000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0681395055000d4e747cc300f2cbc56670b28d89b76be36481f980cb1de9f08e
3
- size 876038330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ea34cc74aa14a146de7b5f181aac6fce8d17d22e4d9337f5dd3fbbaaec9295
3
+ size 876038394
finetuned/{checkpoint-2000 → checkpoint-4000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3baefb05144af3284dffe17d206ea2b1aa96ef791488c680bd7f236a35cb6bbc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd796aa89fddb7fbfb345fbbc50fed5f53d97172cbddf9c71a21c13cbeabbc7
3
  size 14244
finetuned/{checkpoint-1000 → checkpoint-4000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17a6414b6fb5de33c203279a7297b9438b98c06da76a94c9dd96712c5a1d8e13
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4502fb7dec15afe09ebc246fb1872a7b2a71f04495fc6ff09683c387e02e2b3b
3
  size 1064
finetuned/checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.786511835640911,
5
+ "eval_steps": 500,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11165698972755694,
13
+ "grad_norm": 2.4662914276123047,
14
+ "learning_rate": 9.446181330951318e-06,
15
+ "loss": 0.3851,
16
+ "step": 250
17
+ },
18
+ {
19
+ "epoch": 0.2233139794551139,
20
+ "grad_norm": 2.047555923461914,
21
+ "learning_rate": 8.890129522108086e-06,
22
+ "loss": 0.2527,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 0.2233139794551139,
27
+ "eval_loss": 0.1920539289712906,
28
+ "eval_runtime": 20.4794,
29
+ "eval_samples_per_second": 308.7,
30
+ "eval_steps_per_second": 38.624,
31
+ "step": 500
32
+ },
33
+ {
34
+ "epoch": 0.33497096918267083,
35
+ "grad_norm": 9.508040428161621,
36
+ "learning_rate": 8.3318445734703e-06,
37
+ "loss": 0.2201,
38
+ "step": 750
39
+ },
40
+ {
41
+ "epoch": 0.4466279589102278,
42
+ "grad_norm": 3.1730806827545166,
43
+ "learning_rate": 7.773559624832514e-06,
44
+ "loss": 0.2306,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.4466279589102278,
49
+ "eval_loss": 0.19817066192626953,
50
+ "eval_runtime": 20.4532,
51
+ "eval_samples_per_second": 309.096,
52
+ "eval_steps_per_second": 38.674,
53
+ "step": 1000
54
+ },
55
+ {
56
+ "epoch": 0.5582849486377848,
57
+ "grad_norm": 9.710593223571777,
58
+ "learning_rate": 7.21527467619473e-06,
59
+ "loss": 0.2134,
60
+ "step": 1250
61
+ },
62
+ {
63
+ "epoch": 0.6699419383653417,
64
+ "grad_norm": 11.616806983947754,
65
+ "learning_rate": 6.656989727556945e-06,
66
+ "loss": 0.2099,
67
+ "step": 1500
68
+ },
69
+ {
70
+ "epoch": 0.6699419383653417,
71
+ "eval_loss": 0.183230921626091,
72
+ "eval_runtime": 20.4778,
73
+ "eval_samples_per_second": 308.725,
74
+ "eval_steps_per_second": 38.627,
75
+ "step": 1500
76
+ },
77
+ {
78
+ "epoch": 0.7815989280928987,
79
+ "grad_norm": 0.7312414646148682,
80
+ "learning_rate": 6.098704778919161e-06,
81
+ "loss": 0.1859,
82
+ "step": 1750
83
+ },
84
+ {
85
+ "epoch": 0.8932559178204555,
86
+ "grad_norm": 9.158418655395508,
87
+ "learning_rate": 5.540419830281376e-06,
88
+ "loss": 0.209,
89
+ "step": 2000
90
+ },
91
+ {
92
+ "epoch": 0.8932559178204555,
93
+ "eval_loss": 0.20334886014461517,
94
+ "eval_runtime": 20.5003,
95
+ "eval_samples_per_second": 308.386,
96
+ "eval_steps_per_second": 38.585,
97
+ "step": 2000
98
+ },
99
+ {
100
+ "epoch": 1.0049129075480125,
101
+ "grad_norm": 0.6505267024040222,
102
+ "learning_rate": 4.982134881643591e-06,
103
+ "loss": 0.1914,
104
+ "step": 2250
105
+ },
106
+ {
107
+ "epoch": 1.1165698972755695,
108
+ "grad_norm": 0.2128743827342987,
109
+ "learning_rate": 4.423849933005807e-06,
110
+ "loss": 0.1477,
111
+ "step": 2500
112
+ },
113
+ {
114
+ "epoch": 1.1165698972755695,
115
+ "eval_loss": 0.19207331538200378,
116
+ "eval_runtime": 20.4457,
117
+ "eval_samples_per_second": 309.21,
118
+ "eval_steps_per_second": 38.688,
119
+ "step": 2500
120
+ },
121
+ {
122
+ "epoch": 1.2282268870031263,
123
+ "grad_norm": 7.301019668579102,
124
+ "learning_rate": 3.865564984368021e-06,
125
+ "loss": 0.1346,
126
+ "step": 2750
127
+ },
128
+ {
129
+ "epoch": 1.3398838767306833,
130
+ "grad_norm": 1.8853554725646973,
131
+ "learning_rate": 3.307280035730237e-06,
132
+ "loss": 0.1534,
133
+ "step": 3000
134
+ },
135
+ {
136
+ "epoch": 1.3398838767306833,
137
+ "eval_loss": 0.22205530107021332,
138
+ "eval_runtime": 20.4921,
139
+ "eval_samples_per_second": 308.51,
140
+ "eval_steps_per_second": 38.6,
141
+ "step": 3000
142
+ },
143
+ {
144
+ "epoch": 1.4515408664582403,
145
+ "grad_norm": 4.45537805557251,
146
+ "learning_rate": 2.748995087092452e-06,
147
+ "loss": 0.134,
148
+ "step": 3250
149
+ },
150
+ {
151
+ "epoch": 1.563197856185797,
152
+ "grad_norm": 1.3830872774124146,
153
+ "learning_rate": 2.1907101384546676e-06,
154
+ "loss": 0.1423,
155
+ "step": 3500
156
+ },
157
+ {
158
+ "epoch": 1.563197856185797,
159
+ "eval_loss": 0.2144242823123932,
160
+ "eval_runtime": 20.4506,
161
+ "eval_samples_per_second": 309.135,
162
+ "eval_steps_per_second": 38.679,
163
+ "step": 3500
164
+ },
165
+ {
166
+ "epoch": 1.6748548459133543,
167
+ "grad_norm": 6.6492156982421875,
168
+ "learning_rate": 1.6324251898168828e-06,
169
+ "loss": 0.1304,
170
+ "step": 3750
171
+ },
172
+ {
173
+ "epoch": 1.786511835640911,
174
+ "grad_norm": 25.870407104492188,
175
+ "learning_rate": 1.076373380973649e-06,
176
+ "loss": 0.1472,
177
+ "step": 4000
178
+ },
179
+ {
180
+ "epoch": 1.786511835640911,
181
+ "eval_loss": 0.2106480598449707,
182
+ "eval_runtime": 20.45,
183
+ "eval_samples_per_second": 309.144,
184
+ "eval_steps_per_second": 38.68,
185
+ "step": 4000
186
+ }
187
+ ],
188
+ "logging_steps": 250,
189
+ "max_steps": 4478,
190
+ "num_input_tokens_seen": 0,
191
+ "num_train_epochs": 2,
192
+ "save_steps": 1000,
193
+ "stateful_callbacks": {
194
+ "TrainerControl": {
195
+ "args": {
196
+ "should_epoch_stop": false,
197
+ "should_evaluate": false,
198
+ "should_log": false,
199
+ "should_save": true,
200
+ "should_training_stop": false
201
+ },
202
+ "attributes": {}
203
+ }
204
+ },
205
+ "total_flos": 1.65695831557056e+16,
206
+ "train_batch_size": 16,
207
+ "trial_name": null,
208
+ "trial_params": null
209
+ }
finetuned/{checkpoint-2000 → checkpoint-4000}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2d467fd004b82564bf87f840a5a063cdb96832e1ad2d05429b1e9464eca34a0
3
- size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:141eb34e31450e25b568f7f7191acf4a98da817c0aadd9f7e72805b0f4f79fb9
3
+ size 5240
finetuned/{checkpoint-2500 → checkpoint-4478}/config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "bert-base-uncased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
finetuned/{checkpoint-1000 → checkpoint-4478}/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c27c55f327985102eae7e9c1be83b8c60c8275a8b0b3ff27bfcf1fd513100a
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82960dd8038b454884bd4c04473e51c711111b0f37db5cd655609c2b03aa8162
3
  size 437958648
finetuned/{checkpoint-2000 → checkpoint-4478}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:956dd23a3f2662f6624ded6c8fadb3694817f9c7e932226bab6c2da14494b95e
3
- size 876038330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86e35b1717a5fcff9af4d5201723cacf06fac8ac9bfa5d74801ea64aa492c78f
3
+ size 876038394
finetuned/{checkpoint-2500 → checkpoint-4478}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e7cf5be83b0239859ac0fa079f293d45972f68d6dcb8d77b28dc21e1efb5720
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a388acc7697c4b3506f8910dd0fde563539f54157bdf6c5221bcabce8ab28d78
3
  size 14244
finetuned/{checkpoint-2000 → checkpoint-4478}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f4c4b1cfd22d6cb65d5d53fb61cc94ce7abe5101099a35f9cc8f20b468437e7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77207998e702c63df9df9da34fefc0ac738c9e53197c280e0846bc33b69d12d3
3
  size 1064
finetuned/checkpoint-4478/trainer_state.json ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4478,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11165698972755694,
13
+ "grad_norm": 2.4662914276123047,
14
+ "learning_rate": 9.446181330951318e-06,
15
+ "loss": 0.3851,
16
+ "step": 250
17
+ },
18
+ {
19
+ "epoch": 0.2233139794551139,
20
+ "grad_norm": 2.047555923461914,
21
+ "learning_rate": 8.890129522108086e-06,
22
+ "loss": 0.2527,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 0.2233139794551139,
27
+ "eval_loss": 0.1920539289712906,
28
+ "eval_runtime": 20.4794,
29
+ "eval_samples_per_second": 308.7,
30
+ "eval_steps_per_second": 38.624,
31
+ "step": 500
32
+ },
33
+ {
34
+ "epoch": 0.33497096918267083,
35
+ "grad_norm": 9.508040428161621,
36
+ "learning_rate": 8.3318445734703e-06,
37
+ "loss": 0.2201,
38
+ "step": 750
39
+ },
40
+ {
41
+ "epoch": 0.4466279589102278,
42
+ "grad_norm": 3.1730806827545166,
43
+ "learning_rate": 7.773559624832514e-06,
44
+ "loss": 0.2306,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.4466279589102278,
49
+ "eval_loss": 0.19817066192626953,
50
+ "eval_runtime": 20.4532,
51
+ "eval_samples_per_second": 309.096,
52
+ "eval_steps_per_second": 38.674,
53
+ "step": 1000
54
+ },
55
+ {
56
+ "epoch": 0.5582849486377848,
57
+ "grad_norm": 9.710593223571777,
58
+ "learning_rate": 7.21527467619473e-06,
59
+ "loss": 0.2134,
60
+ "step": 1250
61
+ },
62
+ {
63
+ "epoch": 0.6699419383653417,
64
+ "grad_norm": 11.616806983947754,
65
+ "learning_rate": 6.656989727556945e-06,
66
+ "loss": 0.2099,
67
+ "step": 1500
68
+ },
69
+ {
70
+ "epoch": 0.6699419383653417,
71
+ "eval_loss": 0.183230921626091,
72
+ "eval_runtime": 20.4778,
73
+ "eval_samples_per_second": 308.725,
74
+ "eval_steps_per_second": 38.627,
75
+ "step": 1500
76
+ },
77
+ {
78
+ "epoch": 0.7815989280928987,
79
+ "grad_norm": 0.7312414646148682,
80
+ "learning_rate": 6.098704778919161e-06,
81
+ "loss": 0.1859,
82
+ "step": 1750
83
+ },
84
+ {
85
+ "epoch": 0.8932559178204555,
86
+ "grad_norm": 9.158418655395508,
87
+ "learning_rate": 5.540419830281376e-06,
88
+ "loss": 0.209,
89
+ "step": 2000
90
+ },
91
+ {
92
+ "epoch": 0.8932559178204555,
93
+ "eval_loss": 0.20334886014461517,
94
+ "eval_runtime": 20.5003,
95
+ "eval_samples_per_second": 308.386,
96
+ "eval_steps_per_second": 38.585,
97
+ "step": 2000
98
+ },
99
+ {
100
+ "epoch": 1.0049129075480125,
101
+ "grad_norm": 0.6505267024040222,
102
+ "learning_rate": 4.982134881643591e-06,
103
+ "loss": 0.1914,
104
+ "step": 2250
105
+ },
106
+ {
107
+ "epoch": 1.1165698972755695,
108
+ "grad_norm": 0.2128743827342987,
109
+ "learning_rate": 4.423849933005807e-06,
110
+ "loss": 0.1477,
111
+ "step": 2500
112
+ },
113
+ {
114
+ "epoch": 1.1165698972755695,
115
+ "eval_loss": 0.19207331538200378,
116
+ "eval_runtime": 20.4457,
117
+ "eval_samples_per_second": 309.21,
118
+ "eval_steps_per_second": 38.688,
119
+ "step": 2500
120
+ },
121
+ {
122
+ "epoch": 1.2282268870031263,
123
+ "grad_norm": 7.301019668579102,
124
+ "learning_rate": 3.865564984368021e-06,
125
+ "loss": 0.1346,
126
+ "step": 2750
127
+ },
128
+ {
129
+ "epoch": 1.3398838767306833,
130
+ "grad_norm": 1.8853554725646973,
131
+ "learning_rate": 3.307280035730237e-06,
132
+ "loss": 0.1534,
133
+ "step": 3000
134
+ },
135
+ {
136
+ "epoch": 1.3398838767306833,
137
+ "eval_loss": 0.22205530107021332,
138
+ "eval_runtime": 20.4921,
139
+ "eval_samples_per_second": 308.51,
140
+ "eval_steps_per_second": 38.6,
141
+ "step": 3000
142
+ },
143
+ {
144
+ "epoch": 1.4515408664582403,
145
+ "grad_norm": 4.45537805557251,
146
+ "learning_rate": 2.748995087092452e-06,
147
+ "loss": 0.134,
148
+ "step": 3250
149
+ },
150
+ {
151
+ "epoch": 1.563197856185797,
152
+ "grad_norm": 1.3830872774124146,
153
+ "learning_rate": 2.1907101384546676e-06,
154
+ "loss": 0.1423,
155
+ "step": 3500
156
+ },
157
+ {
158
+ "epoch": 1.563197856185797,
159
+ "eval_loss": 0.2144242823123932,
160
+ "eval_runtime": 20.4506,
161
+ "eval_samples_per_second": 309.135,
162
+ "eval_steps_per_second": 38.679,
163
+ "step": 3500
164
+ },
165
+ {
166
+ "epoch": 1.6748548459133543,
167
+ "grad_norm": 6.6492156982421875,
168
+ "learning_rate": 1.6324251898168828e-06,
169
+ "loss": 0.1304,
170
+ "step": 3750
171
+ },
172
+ {
173
+ "epoch": 1.786511835640911,
174
+ "grad_norm": 25.870407104492188,
175
+ "learning_rate": 1.076373380973649e-06,
176
+ "loss": 0.1472,
177
+ "step": 4000
178
+ },
179
+ {
180
+ "epoch": 1.786511835640911,
181
+ "eval_loss": 0.2106480598449707,
182
+ "eval_runtime": 20.45,
183
+ "eval_samples_per_second": 309.144,
184
+ "eval_steps_per_second": 38.68,
185
+ "step": 4000
186
+ },
187
+ {
188
+ "epoch": 1.898168825368468,
189
+ "grad_norm": 14.559236526489258,
190
+ "learning_rate": 5.180884323358643e-07,
191
+ "loss": 0.1483,
192
+ "step": 4250
193
+ }
194
+ ],
195
+ "logging_steps": 250,
196
+ "max_steps": 4478,
197
+ "num_input_tokens_seen": 0,
198
+ "num_train_epochs": 2,
199
+ "save_steps": 1000,
200
+ "stateful_callbacks": {
201
+ "TrainerControl": {
202
+ "args": {
203
+ "should_epoch_stop": false,
204
+ "should_evaluate": false,
205
+ "should_log": false,
206
+ "should_save": true,
207
+ "should_training_stop": true
208
+ },
209
+ "attributes": {}
210
+ }
211
+ },
212
+ "total_flos": 1.854354095966208e+16,
213
+ "train_batch_size": 16,
214
+ "trial_name": null,
215
+ "trial_params": null
216
+ }