lilyray commited on
Commit
df788b6
·
verified ·
1 Parent(s): 55ae2a4

bert-irony-hyper

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +7 -8
  2. model.safetensors +1 -1
  3. run-0/checkpoint-1432/config.json +27 -0
  4. run-0/checkpoint-1432/model.safetensors +3 -0
  5. run-0/checkpoint-1432/optimizer.pt +3 -0
  6. run-0/checkpoint-1432/rng_state.pth +3 -0
  7. run-0/checkpoint-1432/scheduler.pt +3 -0
  8. run-0/checkpoint-1432/special_tokens_map.json +7 -0
  9. run-0/checkpoint-1432/tokenizer_config.json +57 -0
  10. run-0/checkpoint-1432/trainer_state.json +58 -0
  11. run-0/checkpoint-1432/training_args.bin +3 -0
  12. run-0/checkpoint-1432/vocab.txt +0 -0
  13. run-0/checkpoint-2148/config.json +27 -0
  14. run-0/checkpoint-2148/model.safetensors +3 -0
  15. run-0/checkpoint-2148/optimizer.pt +3 -0
  16. run-0/checkpoint-2148/rng_state.pth +3 -0
  17. run-0/checkpoint-2148/scheduler.pt +3 -0
  18. run-0/checkpoint-2148/special_tokens_map.json +7 -0
  19. run-0/checkpoint-2148/tokenizer_config.json +57 -0
  20. run-0/checkpoint-2148/trainer_state.json +81 -0
  21. run-0/checkpoint-2148/training_args.bin +3 -0
  22. run-0/checkpoint-2148/vocab.txt +0 -0
  23. run-0/checkpoint-716/config.json +27 -0
  24. run-0/checkpoint-716/model.safetensors +3 -0
  25. run-0/checkpoint-716/optimizer.pt +3 -0
  26. run-0/checkpoint-716/rng_state.pth +3 -0
  27. run-0/checkpoint-716/scheduler.pt +3 -0
  28. run-0/checkpoint-716/special_tokens_map.json +7 -0
  29. run-0/checkpoint-716/tokenizer_config.json +57 -0
  30. run-0/checkpoint-716/trainer_state.json +42 -0
  31. run-0/checkpoint-716/training_args.bin +3 -0
  32. run-0/checkpoint-716/vocab.txt +0 -0
  33. run-1/checkpoint-358/config.json +27 -0
  34. run-1/checkpoint-358/model.safetensors +3 -0
  35. run-1/checkpoint-358/optimizer.pt +3 -0
  36. run-1/checkpoint-358/rng_state.pth +3 -0
  37. run-1/checkpoint-358/scheduler.pt +3 -0
  38. run-1/checkpoint-358/special_tokens_map.json +7 -0
  39. run-1/checkpoint-358/tokenizer_config.json +57 -0
  40. run-1/checkpoint-358/trainer_state.json +35 -0
  41. run-1/checkpoint-358/training_args.bin +3 -0
  42. run-1/checkpoint-358/vocab.txt +0 -0
  43. run-1/checkpoint-716/config.json +27 -0
  44. run-1/checkpoint-716/model.safetensors +3 -0
  45. run-1/checkpoint-716/optimizer.pt +3 -0
  46. run-1/checkpoint-716/rng_state.pth +3 -0
  47. run-1/checkpoint-716/scheduler.pt +3 -0
  48. run-1/checkpoint-716/special_tokens_map.json +7 -0
  49. run-1/checkpoint-716/tokenizer_config.json +57 -0
  50. run-1/checkpoint-716/trainer_state.json +51 -0
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.4492
21
- - Accuracy: 0.7921
22
 
23
  ## Model description
24
 
@@ -37,21 +37,20 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 2e-05
41
  - train_batch_size: 8
42
  - eval_batch_size: 8
43
- - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 3
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
52
- | No log | 1.0 | 358 | 0.6238 | 0.6806 |
53
- | 0.582 | 2.0 | 716 | 0.6726 | 0.6754 |
54
- | 0.3462 | 3.0 | 1074 | 0.8248 | 0.7089 |
55
 
56
 
57
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.4570
21
+ - Accuracy: 0.8194
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 6.383530088472568e-05
41
  - train_batch_size: 8
42
  - eval_batch_size: 8
43
+ - seed: 38
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - num_epochs: 2
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
52
+ | No log | 1.0 | 358 | 0.5821 | 0.6951 |
53
+ | 0.5947 | 2.0 | 716 | 0.7254 | 0.6993 |
 
54
 
55
 
56
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a65cfd6c8c67ea81c8a9bd210904b5a3207c4cb59b697427c8624d4a158fbe3c
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e54a5db3a597e7660c2c7923cb394161ba914ae470992f98b353c56dfb4bf3
3
  size 437958648
run-0/checkpoint-1432/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
run-0/checkpoint-1432/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466f56598d470d0d6e6863f102554af8793f8000017cf67dfb3c1b0b1c11a746
3
+ size 437958648
run-0/checkpoint-1432/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efefa1cb2e43eca86388c140d986fc42044bf02eff95d330c750a7121ef14a8f
3
+ size 876038394
run-0/checkpoint-1432/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b3cf3545a26ad3b5b8ac5e19b825521066c417f4612f09c6113c18275eb81d5
3
+ size 14244
run-0/checkpoint-1432/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6109cba35272005a0b96a29d208d6cd5344a03fe5d1e2a18ceb80837835819b8
3
+ size 1064
run-0/checkpoint-1432/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-1432/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-1432/trainer_state.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6532331109046936,
3
+ "best_model_checkpoint": "./bert_irony/run-0/checkpoint-716",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1432,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 9.312857627868652,
14
+ "learning_rate": 3.361475882040115e-05,
15
+ "loss": 0.6817,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5763888888888888,
21
+ "eval_loss": 0.6532331109046936,
22
+ "eval_runtime": 8.2239,
23
+ "eval_samples_per_second": 116.125,
24
+ "eval_steps_per_second": 14.592,
25
+ "step": 716
26
+ },
27
+ {
28
+ "epoch": 1.4,
29
+ "grad_norm": 11.967634201049805,
30
+ "learning_rate": 2.3416106265667792e-05,
31
+ "loss": 0.6184,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.6913194444444445,
37
+ "eval_loss": 0.7047404050827026,
38
+ "eval_runtime": 8.215,
39
+ "eval_samples_per_second": 116.251,
40
+ "eval_steps_per_second": 14.607,
41
+ "step": 1432
42
+ }
43
+ ],
44
+ "logging_steps": 500,
45
+ "max_steps": 2148,
46
+ "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 3,
48
+ "save_steps": 500,
49
+ "total_flos": 1051917999329280.0,
50
+ "train_batch_size": 4,
51
+ "trial_name": null,
52
+ "trial_params": {
53
+ "learning_rate": 4.381341137513451e-05,
54
+ "num_train_epochs": 3,
55
+ "per_device_train_batch_size": 4,
56
+ "seed": 28
57
+ }
58
+ }
run-0/checkpoint-1432/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df87ed98c5e9d32ba2cafd8467572084a9877072cecd8864828f54a98caa3b2
3
+ size 4920
run-0/checkpoint-1432/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-2148/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
run-0/checkpoint-2148/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e686d6b911a5f49bd7811a66846d5d24d20c9ce21327249f97c4bf27ef14288a
3
+ size 437958648
run-0/checkpoint-2148/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b405a9ca98682f2d9068f8433b4101363794e1cab25342a8319ac34b3a634100
3
+ size 876038394
run-0/checkpoint-2148/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbbb5a2b8bb0078c73cce7cb2b760d7ca2b09c02604af5fd96d543cfc512bade
3
+ size 14244
run-0/checkpoint-2148/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5412dfe8e228c1fb5574c1b3afd8d4239c37f91fa5039b7ce89f468d2f0f52c
3
+ size 1064
run-0/checkpoint-2148/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-2148/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-2148/trainer_state.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6532331109046936,
3
+ "best_model_checkpoint": "./bert_irony/run-0/checkpoint-716",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2148,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 9.312857627868652,
14
+ "learning_rate": 3.361475882040115e-05,
15
+ "loss": 0.6817,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5763888888888888,
21
+ "eval_loss": 0.6532331109046936,
22
+ "eval_runtime": 8.2239,
23
+ "eval_samples_per_second": 116.125,
24
+ "eval_steps_per_second": 14.592,
25
+ "step": 716
26
+ },
27
+ {
28
+ "epoch": 1.4,
29
+ "grad_norm": 11.967634201049805,
30
+ "learning_rate": 2.3416106265667792e-05,
31
+ "loss": 0.6184,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.6913194444444445,
37
+ "eval_loss": 0.7047404050827026,
38
+ "eval_runtime": 8.215,
39
+ "eval_samples_per_second": 116.251,
40
+ "eval_steps_per_second": 14.607,
41
+ "step": 1432
42
+ },
43
+ {
44
+ "epoch": 2.09,
45
+ "grad_norm": 6.910437107086182,
46
+ "learning_rate": 1.3217453710934432e-05,
47
+ "loss": 0.5339,
48
+ "step": 1500
49
+ },
50
+ {
51
+ "epoch": 2.79,
52
+ "grad_norm": 0.3941420316696167,
53
+ "learning_rate": 3.018801156201074e-06,
54
+ "loss": 0.3536,
55
+ "step": 2000
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_accuracy": 0.6774305555555554,
60
+ "eval_loss": 1.3720420598983765,
61
+ "eval_runtime": 8.2327,
62
+ "eval_samples_per_second": 116.001,
63
+ "eval_steps_per_second": 14.576,
64
+ "step": 2148
65
+ }
66
+ ],
67
+ "logging_steps": 500,
68
+ "max_steps": 2148,
69
+ "num_input_tokens_seen": 0,
70
+ "num_train_epochs": 3,
71
+ "save_steps": 500,
72
+ "total_flos": 2103835998658560.0,
73
+ "train_batch_size": 4,
74
+ "trial_name": null,
75
+ "trial_params": {
76
+ "learning_rate": 4.381341137513451e-05,
77
+ "num_train_epochs": 3,
78
+ "per_device_train_batch_size": 4,
79
+ "seed": 28
80
+ }
81
+ }
run-0/checkpoint-2148/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df87ed98c5e9d32ba2cafd8467572084a9877072cecd8864828f54a98caa3b2
3
+ size 4920
run-0/checkpoint-2148/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-716/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
run-0/checkpoint-716/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f9c4b20e1a7e18015be933993dc5eb7848a6b910e16c5a842b9333f8f145722
3
+ size 437958648
run-0/checkpoint-716/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a478d1c361999a29ff1ccf0bca2692b0603e1ae5f6157433716ad6973c0c9e
3
+ size 876038394
run-0/checkpoint-716/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836ec545da560e0ef9dcd573d79329688b0b8f346bd3375c9a14b53dac27b72c
3
+ size 14244
run-0/checkpoint-716/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddda7cd7e7d09d8f3ffd501f009210617180a252f88afa9a65d1aa461d2d82f1
3
+ size 1064
run-0/checkpoint-716/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-716/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-716/trainer_state.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6532331109046936,
3
+ "best_model_checkpoint": "./bert_irony/run-0/checkpoint-716",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 716,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 9.312857627868652,
14
+ "learning_rate": 3.361475882040115e-05,
15
+ "loss": 0.6817,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5763888888888888,
21
+ "eval_loss": 0.6532331109046936,
22
+ "eval_runtime": 8.2239,
23
+ "eval_samples_per_second": 116.125,
24
+ "eval_steps_per_second": 14.592,
25
+ "step": 716
26
+ }
27
+ ],
28
+ "logging_steps": 500,
29
+ "max_steps": 2148,
30
+ "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 3,
32
+ "save_steps": 500,
33
+ "total_flos": 526222110720000.0,
34
+ "train_batch_size": 4,
35
+ "trial_name": null,
36
+ "trial_params": {
37
+ "learning_rate": 4.381341137513451e-05,
38
+ "num_train_epochs": 3,
39
+ "per_device_train_batch_size": 4,
40
+ "seed": 28
41
+ }
42
+ }
run-0/checkpoint-716/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df87ed98c5e9d32ba2cafd8467572084a9877072cecd8864828f54a98caa3b2
3
+ size 4920
run-0/checkpoint-716/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-358/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
run-1/checkpoint-358/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e54a5db3a597e7660c2c7923cb394161ba914ae470992f98b353c56dfb4bf3
3
+ size 437958648
run-1/checkpoint-358/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbfcca6f4886422080a93b76a2bc0bc082b1e1d94a508f236f10333c44c91f9b
3
+ size 876038394
run-1/checkpoint-358/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00c61b49be04244217ed78dccdf50df27b9864769abbee9d4e91bbb27eaab18
3
+ size 14244
run-1/checkpoint-358/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:119adbe001b0638d9f3aa55ae6a19cb918923192d98b9be2d18a518ddf3ba9d5
3
+ size 1064
run-1/checkpoint-358/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-1/checkpoint-358/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-1/checkpoint-358/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5820930004119873,
3
+ "best_model_checkpoint": "./bert_irony/run-1/checkpoint-358",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 358,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.6951388888888889,
14
+ "eval_loss": 0.5820930004119873,
15
+ "eval_runtime": 8.2633,
16
+ "eval_samples_per_second": 115.572,
17
+ "eval_steps_per_second": 14.522,
18
+ "step": 358
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 716,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 2,
25
+ "save_steps": 500,
26
+ "total_flos": 0,
27
+ "train_batch_size": 8,
28
+ "trial_name": null,
29
+ "trial_params": {
30
+ "learning_rate": 6.383530088472568e-05,
31
+ "num_train_epochs": 2,
32
+ "per_device_train_batch_size": 8,
33
+ "seed": 38
34
+ }
35
+ }
run-1/checkpoint-358/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17cf45345a048e3bb423ce94e685f9c5474594feffe769b012354aba56eb10c
3
+ size 4920
run-1/checkpoint-358/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-716/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
run-1/checkpoint-716/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1429753d738639c4fa1ab93473dc77af872e36bd1cf6387f98de1d5b5057779a
3
+ size 437958648
run-1/checkpoint-716/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502fda613d1c6414c797d2840125a54cd9ebb77887ba96d77b8ae721932c2095
3
+ size 876038394
run-1/checkpoint-716/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0003f77cca47ec17d738631773d261bb7add0e4531087f0c7cd4c62127b3c13e
3
+ size 14244
run-1/checkpoint-716/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f187bacd792cd20f7e19f2cae507203958d1bc673201186808eb1d01b79e1bba
3
+ size 1064
run-1/checkpoint-716/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-1/checkpoint-716/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-1/checkpoint-716/trainer_state.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5820930004119873,
3
+ "best_model_checkpoint": "./bert_irony/run-1/checkpoint-358",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 716,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.6951388888888889,
14
+ "eval_loss": 0.5820930004119873,
15
+ "eval_runtime": 8.2633,
16
+ "eval_samples_per_second": 115.572,
17
+ "eval_steps_per_second": 14.522,
18
+ "step": 358
19
+ },
20
+ {
21
+ "epoch": 1.4,
22
+ "grad_norm": 7.699513912200928,
23
+ "learning_rate": 1.9257576803213335e-05,
24
+ "loss": 0.5947,
25
+ "step": 500
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "eval_accuracy": 0.6993055555555555,
30
+ "eval_loss": 0.7253889441490173,
31
+ "eval_runtime": 8.266,
32
+ "eval_samples_per_second": 115.533,
33
+ "eval_steps_per_second": 14.517,
34
+ "step": 716
35
+ }
36
+ ],
37
+ "logging_steps": 500,
38
+ "max_steps": 716,
39
+ "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 2,
41
+ "save_steps": 500,
42
+ "total_flos": 1051917999329280.0,
43
+ "train_batch_size": 8,
44
+ "trial_name": null,
45
+ "trial_params": {
46
+ "learning_rate": 6.383530088472568e-05,
47
+ "num_train_epochs": 2,
48
+ "per_device_train_batch_size": 8,
49
+ "seed": 38
50
+ }
51
+ }