Chantland commited on
Commit
3173969
·
verified ·
1 Parent(s): 7baa2d8

Updated model for regularization testing

Browse files
config.json CHANGED
@@ -9,45 +9,33 @@
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
- "0": "EVENT",
13
- "1": "EVENT_Illness",
14
- "2": "EVENT_Accident",
15
- "3": "EVENT_Other",
16
- "4": "CAUSE",
17
- "5": "CAUSE_Just_Happens",
18
- "6": "CAUSE_Material_Physical",
19
- "7": "CAUSE_Spirits_Gods",
20
- "8": "CAUSE_Witchcraft_Sorcery",
21
- "9": "CAUSE_Rule_Violation_Taboo",
22
- "10": "CAUSE_Jealousy_Evil_Eye",
23
- "11": "ACTION",
24
- "12": "ACTION_Physical_Material",
25
- "13": "ACTION_Technical_Specialist",
26
- "14": "ACTION_Divination",
27
- "15": "ACTION_Shaman_Medium_Healer",
28
- "16": "ACTION_Priest_High_Religion",
29
- "17": "ACTION_Other"
30
  },
31
  "initializer_range": 0.02,
32
  "label2id": {
33
- "ACTION": 11,
34
- "ACTION_Divination": 14,
35
- "ACTION_Other": 17,
36
- "ACTION_Physical_Material": 12,
37
- "ACTION_Priest_High_Religion": 16,
38
- "ACTION_Shaman_Medium_Healer": 15,
39
- "ACTION_Technical_Specialist": 13,
40
- "CAUSE": 4,
41
- "CAUSE_Jealousy_Evil_Eye": 10,
42
- "CAUSE_Just_Happens": 5,
43
- "CAUSE_Material_Physical": 6,
44
- "CAUSE_Rule_Violation_Taboo": 9,
45
- "CAUSE_Spirits_Gods": 7,
46
- "CAUSE_Witchcraft_Sorcery": 8,
47
- "EVENT": 0,
48
- "EVENT_Accident": 2,
49
- "EVENT_Illness": 1,
50
- "EVENT_Other": 3
51
  },
52
  "max_position_embeddings": 512,
53
  "model_type": "distilbert",
@@ -60,6 +48,6 @@
60
  "sinusoidal_pos_embds": false,
61
  "tie_weights_": true,
62
  "torch_dtype": "float32",
63
- "transformers_version": "4.24.0",
64
  "vocab_size": 30522
65
  }
 
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
+ "0": "EVENT_Illness",
13
+ "1": "EVENT_Accident",
14
+ "2": "EVENT_Other",
15
+ "3": "CAUSE_Material_Physical",
16
+ "4": "CAUSE_Spirits_Gods",
17
+ "5": "CAUSE_Witchcraft_Sorcery",
18
+ "6": "CAUSE_Rule_Violation_Taboo",
19
+ "7": "ACTION_Physical_Material",
20
+ "8": "ACTION_Technical_Specialist",
21
+ "9": "ACTION_Divination",
22
+ "10": "ACTION_Shaman_Medium_Healer",
23
+ "11": "ACTION_Priest_High_Religion"
 
 
 
 
 
 
24
  },
25
  "initializer_range": 0.02,
26
  "label2id": {
27
+ "ACTION_Divination": 9,
28
+ "ACTION_Physical_Material": 7,
29
+ "ACTION_Priest_High_Religion": 11,
30
+ "ACTION_Shaman_Medium_Healer": 10,
31
+ "ACTION_Technical_Specialist": 8,
32
+ "CAUSE_Material_Physical": 3,
33
+ "CAUSE_Rule_Violation_Taboo": 6,
34
+ "CAUSE_Spirits_Gods": 4,
35
+ "CAUSE_Witchcraft_Sorcery": 5,
36
+ "EVENT_Accident": 1,
37
+ "EVENT_Illness": 0,
38
+ "EVENT_Other": 2
 
 
 
 
 
 
39
  },
40
  "max_position_embeddings": 512,
41
  "model_type": "distilbert",
 
48
  "sinusoidal_pos_embds": false,
49
  "tie_weights_": true,
50
  "torch_dtype": "float32",
51
+ "transformers_version": "4.41.1",
52
  "vocab_size": 30522
53
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:610010ff6d1820ecda9ccc984dc14b78e65a98750da221e9da71dd4100294c76
3
+ size 267863328
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f74ba96451ae68674f8c03175ca529b0c9586d1c826d4ea4dc57f2cdcf9fe
3
- size 535796549
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:097a9d2dbbb8bd734adf115a34b696de90b89b7e310a93e9fb3c58bd2e7ddceb
3
+ size 535788730
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d6efb74f26f13ab44ac71a5b2839da2cb02076f78433571e42ff77df959c617
3
- size 13553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ff5bb36b8e29678107a9fe193a35ed023fde1c7b0d9c95341d37f70c2cdbea
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dceeccbae01744d839850b433e9e5d599a8543ba27144a7fc306eba118b0d12
3
- size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b3b185fea1205fbfcb56f550d303bc0b6cb7d325de973a2d3dc5d3e34c96cf
3
+ size 1064
tokenizer.json CHANGED
@@ -10,48 +10,48 @@
10
  "added_tokens": [
11
  {
12
  "id": 0,
13
- "special": true,
14
  "content": "[PAD]",
15
  "single_word": false,
16
  "lstrip": false,
17
  "rstrip": false,
18
- "normalized": false
 
19
  },
20
  {
21
  "id": 100,
22
- "special": true,
23
  "content": "[UNK]",
24
  "single_word": false,
25
  "lstrip": false,
26
  "rstrip": false,
27
- "normalized": false
 
28
  },
29
  {
30
  "id": 101,
31
- "special": true,
32
  "content": "[CLS]",
33
  "single_word": false,
34
  "lstrip": false,
35
  "rstrip": false,
36
- "normalized": false
 
37
  },
38
  {
39
  "id": 102,
40
- "special": true,
41
  "content": "[SEP]",
42
  "single_word": false,
43
  "lstrip": false,
44
  "rstrip": false,
45
- "normalized": false
 
46
  },
47
  {
48
  "id": 103,
49
- "special": true,
50
  "content": "[MASK]",
51
  "single_word": false,
52
  "lstrip": false,
53
  "rstrip": false,
54
- "normalized": false
 
55
  }
56
  ],
57
  "normalizer": {
 
10
  "added_tokens": [
11
  {
12
  "id": 0,
 
13
  "content": "[PAD]",
14
  "single_word": false,
15
  "lstrip": false,
16
  "rstrip": false,
17
+ "normalized": false,
18
+ "special": true
19
  },
20
  {
21
  "id": 100,
 
22
  "content": "[UNK]",
23
  "single_word": false,
24
  "lstrip": false,
25
  "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
  },
29
  {
30
  "id": 101,
 
31
  "content": "[CLS]",
32
  "single_word": false,
33
  "lstrip": false,
34
  "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
  },
38
  {
39
  "id": 102,
 
40
  "content": "[SEP]",
41
  "single_word": false,
42
  "lstrip": false,
43
  "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
  },
47
  {
48
  "id": 103,
 
49
  "content": "[MASK]",
50
  "single_word": false,
51
  "lstrip": false,
52
  "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
  }
56
  ],
57
  "normalizer": {
tokenizer_config.json CHANGED
@@ -1,12 +1,53 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "cls_token": "[CLS]",
3
  "do_lower_case": true,
4
  "mask_token": "[MASK]",
5
  "model_max_length": 512,
6
- "name_or_path": "distilbert-base-uncased",
7
  "pad_token": "[PAD]",
8
  "sep_token": "[SEP]",
9
- "special_tokens_map_file": null,
10
  "strip_accents": null,
11
  "tokenize_chinese_chars": true,
12
  "tokenizer_class": "DistilBertTokenizer",
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
  "model_max_length": 512,
 
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
 
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
  "tokenizer_class": "DistilBertTokenizer",
trainer_state.json CHANGED
@@ -1,77 +1,925 @@
1
  {
2
- "best_metric": 0.7183271832718328,
3
- "best_model_checkpoint": "HRAF_Model_MultiLabel_SubClasses/checkpoint-875",
4
- "epoch": 5.0,
5
- "global_step": 875,
 
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.0,
12
- "eval_accuracy": 0.0,
13
- "eval_f1": 0.667278849097031,
14
- "eval_loss": 0.41065430641174316,
15
- "eval_roc_auc": 0.7616334780182695,
16
- "eval_runtime": 107.1579,
17
- "eval_samples_per_second": 3.266,
18
- "eval_steps_per_second": 0.411,
19
- "step": 175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.002857142857142857,
24
- "eval_f1": 0.692448233861145,
25
- "eval_loss": 0.38348057866096497,
26
- "eval_roc_auc": 0.7780945131452475,
27
- "eval_runtime": 90.7152,
28
- "eval_samples_per_second": 3.858,
29
- "eval_steps_per_second": 0.485,
30
- "step": 350
31
- },
32
- {
33
- "epoch": 2.86,
34
- "learning_rate": 8.571428571428571e-06,
35
- "loss": 0.4077,
36
- "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  },
38
  {
39
  "epoch": 3.0,
40
- "eval_accuracy": 0.022857142857142857,
41
- "eval_f1": 0.7051744885679904,
42
- "eval_loss": 0.3677811026573181,
43
- "eval_roc_auc": 0.7873223134208052,
44
- "eval_runtime": 90.6425,
45
- "eval_samples_per_second": 3.861,
46
- "eval_steps_per_second": 0.485,
47
- "step": 525
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
  {
50
  "epoch": 4.0,
51
- "eval_accuracy": 0.04857142857142857,
52
- "eval_f1": 0.7114503816793892,
53
- "eval_loss": 0.35647499561309814,
54
- "eval_roc_auc": 0.7900212002732531,
55
- "eval_runtime": 90.64,
56
- "eval_samples_per_second": 3.861,
57
- "eval_steps_per_second": 0.485,
58
- "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  },
60
  {
61
  "epoch": 5.0,
62
- "eval_accuracy": 0.05142857142857143,
63
- "eval_f1": 0.7183271832718328,
64
- "eval_loss": 0.3486453592777252,
65
- "eval_roc_auc": 0.7937415018923373,
66
- "eval_runtime": 90.5252,
67
- "eval_samples_per_second": 3.866,
68
- "eval_steps_per_second": 0.486,
69
- "step": 875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
71
  ],
72
- "max_steps": 875,
73
- "num_train_epochs": 5,
74
- "total_flos": 784104676533360.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  "trial_name": null,
76
  "trial_params": null
77
  }
 
1
  {
2
+ "best_metric": 0.6474642162926313,
3
+ "best_model_checkpoint": "drive/MyDrive/NLP_HRAF//Models/HRAF_MultiLabel_SubClasses_Kfolds/Model_3_LearningRates/Learning_Rate_2e-05_fold_1/checkpoint-10790",
4
+ "epoch": 13.0,
5
+ "eval_steps": 500,
6
+ "global_step": 10790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.12048192771084337,
13
+ "grad_norm": 0.7108750343322754,
14
+ "learning_rate": 1.9839357429718877e-05,
15
+ "loss": 0.4589,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.24096385542168675,
20
+ "grad_norm": 0.7625552415847778,
21
+ "learning_rate": 1.967871485943775e-05,
22
+ "loss": 0.3592,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.3614457831325301,
27
+ "grad_norm": 0.8666885495185852,
28
+ "learning_rate": 1.951807228915663e-05,
29
+ "loss": 0.3393,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.4819277108433735,
34
+ "grad_norm": 0.9493631720542908,
35
+ "learning_rate": 1.9357429718875505e-05,
36
+ "loss": 0.3154,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.6024096385542169,
41
+ "grad_norm": 0.8787522315979004,
42
+ "learning_rate": 1.9196787148594377e-05,
43
+ "loss": 0.3001,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.7228915662650602,
48
+ "grad_norm": 0.9206348061561584,
49
+ "learning_rate": 1.9036144578313255e-05,
50
+ "loss": 0.2905,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.8433734939759037,
55
+ "grad_norm": 1.008158564567566,
56
+ "learning_rate": 1.887550200803213e-05,
57
+ "loss": 0.2877,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.963855421686747,
62
+ "grad_norm": 1.307525873184204,
63
+ "learning_rate": 1.8714859437751005e-05,
64
+ "loss": 0.2888,
65
+ "step": 800
66
+ },
67
  {
68
  "epoch": 1.0,
69
+ "eval_accuracy": 0.36347197106690776,
70
+ "eval_f1": 0.5040954832670255,
71
+ "eval_loss": 0.2610304355621338,
72
+ "eval_roc_auc": 0.67926006705383,
73
+ "eval_runtime": 20.823,
74
+ "eval_samples_per_second": 79.671,
75
+ "eval_steps_per_second": 9.989,
76
+ "step": 830
77
+ },
78
+ {
79
+ "epoch": 1.0843373493975903,
80
+ "grad_norm": 1.141208529472351,
81
+ "learning_rate": 1.855421686746988e-05,
82
+ "loss": 0.269,
83
+ "step": 900
84
+ },
85
+ {
86
+ "epoch": 1.2048192771084336,
87
+ "grad_norm": 0.8844193816184998,
88
+ "learning_rate": 1.8393574297188755e-05,
89
+ "loss": 0.2579,
90
+ "step": 1000
91
+ },
92
+ {
93
+ "epoch": 1.3253012048192772,
94
+ "grad_norm": 1.1616642475128174,
95
+ "learning_rate": 1.8232931726907634e-05,
96
+ "loss": 0.2549,
97
+ "step": 1100
98
+ },
99
+ {
100
+ "epoch": 1.4457831325301205,
101
+ "grad_norm": 1.3445640802383423,
102
+ "learning_rate": 1.807228915662651e-05,
103
+ "loss": 0.2551,
104
+ "step": 1200
105
+ },
106
+ {
107
+ "epoch": 1.5662650602409638,
108
+ "grad_norm": 1.131273865699768,
109
+ "learning_rate": 1.7911646586345384e-05,
110
+ "loss": 0.2672,
111
+ "step": 1300
112
+ },
113
+ {
114
+ "epoch": 1.6867469879518073,
115
+ "grad_norm": 1.3358525037765503,
116
+ "learning_rate": 1.775100401606426e-05,
117
+ "loss": 0.2454,
118
+ "step": 1400
119
+ },
120
+ {
121
+ "epoch": 1.8072289156626506,
122
+ "grad_norm": 1.2100324630737305,
123
+ "learning_rate": 1.7590361445783134e-05,
124
+ "loss": 0.2364,
125
+ "step": 1500
126
+ },
127
+ {
128
+ "epoch": 1.927710843373494,
129
+ "grad_norm": 1.2214767932891846,
130
+ "learning_rate": 1.742971887550201e-05,
131
+ "loss": 0.2409,
132
+ "step": 1600
133
  },
134
  {
135
  "epoch": 2.0,
136
+ "eval_accuracy": 0.3767329716696805,
137
+ "eval_f1": 0.6076237435994689,
138
+ "eval_loss": 0.24850943684577942,
139
+ "eval_roc_auc": 0.7578414834576195,
140
+ "eval_runtime": 21.0346,
141
+ "eval_samples_per_second": 78.87,
142
+ "eval_steps_per_second": 9.888,
143
+ "step": 1660
144
+ },
145
+ {
146
+ "epoch": 2.0481927710843375,
147
+ "grad_norm": 0.8881352543830872,
148
+ "learning_rate": 1.7269076305220884e-05,
149
+ "loss": 0.2341,
150
+ "step": 1700
151
+ },
152
+ {
153
+ "epoch": 2.1686746987951806,
154
+ "grad_norm": 1.4135143756866455,
155
+ "learning_rate": 1.710843373493976e-05,
156
+ "loss": 0.2119,
157
+ "step": 1800
158
+ },
159
+ {
160
+ "epoch": 2.289156626506024,
161
+ "grad_norm": 1.291266918182373,
162
+ "learning_rate": 1.6947791164658637e-05,
163
+ "loss": 0.2074,
164
+ "step": 1900
165
+ },
166
+ {
167
+ "epoch": 2.4096385542168672,
168
+ "grad_norm": 1.311950445175171,
169
+ "learning_rate": 1.6787148594377512e-05,
170
+ "loss": 0.2063,
171
+ "step": 2000
172
+ },
173
+ {
174
+ "epoch": 2.5301204819277108,
175
+ "grad_norm": 1.6564782857894897,
176
+ "learning_rate": 1.6626506024096387e-05,
177
+ "loss": 0.2169,
178
+ "step": 2100
179
+ },
180
+ {
181
+ "epoch": 2.6506024096385543,
182
+ "grad_norm": 2.071871757507324,
183
+ "learning_rate": 1.6465863453815262e-05,
184
+ "loss": 0.2041,
185
+ "step": 2200
186
+ },
187
+ {
188
+ "epoch": 2.7710843373493974,
189
+ "grad_norm": 1.6373318433761597,
190
+ "learning_rate": 1.6305220883534137e-05,
191
+ "loss": 0.2149,
192
+ "step": 2300
193
+ },
194
+ {
195
+ "epoch": 2.891566265060241,
196
+ "grad_norm": 1.5182716846466064,
197
+ "learning_rate": 1.6144578313253015e-05,
198
+ "loss": 0.2057,
199
+ "step": 2400
200
  },
201
  {
202
  "epoch": 3.0,
203
+ "eval_accuracy": 0.3821579264617239,
204
+ "eval_f1": 0.6133793367835921,
205
+ "eval_loss": 0.24218665063381195,
206
+ "eval_roc_auc": 0.7590708449600018,
207
+ "eval_runtime": 20.7829,
208
+ "eval_samples_per_second": 79.825,
209
+ "eval_steps_per_second": 10.008,
210
+ "step": 2490
211
+ },
212
+ {
213
+ "epoch": 3.0120481927710845,
214
+ "grad_norm": 1.7013568878173828,
215
+ "learning_rate": 1.5983935742971887e-05,
216
+ "loss": 0.202,
217
+ "step": 2500
218
+ },
219
+ {
220
+ "epoch": 3.1325301204819276,
221
+ "grad_norm": 1.2363818883895874,
222
+ "learning_rate": 1.5823293172690762e-05,
223
+ "loss": 0.1797,
224
+ "step": 2600
225
+ },
226
+ {
227
+ "epoch": 3.253012048192771,
228
+ "grad_norm": 2.084066390991211,
229
+ "learning_rate": 1.566265060240964e-05,
230
+ "loss": 0.1788,
231
+ "step": 2700
232
+ },
233
+ {
234
+ "epoch": 3.3734939759036147,
235
+ "grad_norm": 1.1712193489074707,
236
+ "learning_rate": 1.5502008032128516e-05,
237
+ "loss": 0.169,
238
+ "step": 2800
239
+ },
240
+ {
241
+ "epoch": 3.4939759036144578,
242
+ "grad_norm": 1.9433542490005493,
243
+ "learning_rate": 1.534136546184739e-05,
244
+ "loss": 0.1712,
245
+ "step": 2900
246
+ },
247
+ {
248
+ "epoch": 3.6144578313253013,
249
+ "grad_norm": 2.300347328186035,
250
+ "learning_rate": 1.5180722891566266e-05,
251
+ "loss": 0.1749,
252
+ "step": 3000
253
+ },
254
+ {
255
+ "epoch": 3.734939759036145,
256
+ "grad_norm": 1.4740608930587769,
257
+ "learning_rate": 1.5020080321285142e-05,
258
+ "loss": 0.1736,
259
+ "step": 3100
260
+ },
261
+ {
262
+ "epoch": 3.855421686746988,
263
+ "grad_norm": 1.7682331800460815,
264
+ "learning_rate": 1.4859437751004017e-05,
265
+ "loss": 0.1635,
266
+ "step": 3200
267
+ },
268
+ {
269
+ "epoch": 3.9759036144578315,
270
+ "grad_norm": 1.4352338314056396,
271
+ "learning_rate": 1.4698795180722894e-05,
272
+ "loss": 0.1587,
273
+ "step": 3300
274
  },
275
  {
276
  "epoch": 4.0,
277
+ "eval_accuracy": 0.38276069921639544,
278
+ "eval_f1": 0.6340291889894697,
279
+ "eval_loss": 0.2462325543165207,
280
+ "eval_roc_auc": 0.7771644800479557,
281
+ "eval_runtime": 21.0125,
282
+ "eval_samples_per_second": 78.953,
283
+ "eval_steps_per_second": 9.899,
284
+ "step": 3320
285
+ },
286
+ {
287
+ "epoch": 4.096385542168675,
288
+ "grad_norm": 3.4992010593414307,
289
+ "learning_rate": 1.4538152610441769e-05,
290
+ "loss": 0.148,
291
+ "step": 3400
292
+ },
293
+ {
294
+ "epoch": 4.216867469879518,
295
+ "grad_norm": 1.555429458618164,
296
+ "learning_rate": 1.4377510040160642e-05,
297
+ "loss": 0.1397,
298
+ "step": 3500
299
+ },
300
+ {
301
+ "epoch": 4.337349397590361,
302
+ "grad_norm": 2.655567169189453,
303
+ "learning_rate": 1.4216867469879519e-05,
304
+ "loss": 0.1412,
305
+ "step": 3600
306
+ },
307
+ {
308
+ "epoch": 4.457831325301205,
309
+ "grad_norm": 1.797630786895752,
310
+ "learning_rate": 1.4056224899598394e-05,
311
+ "loss": 0.1419,
312
+ "step": 3700
313
+ },
314
+ {
315
+ "epoch": 4.578313253012048,
316
+ "grad_norm": 1.2415262460708618,
317
+ "learning_rate": 1.3895582329317269e-05,
318
+ "loss": 0.1405,
319
+ "step": 3800
320
+ },
321
+ {
322
+ "epoch": 4.698795180722891,
323
+ "grad_norm": 1.4111042022705078,
324
+ "learning_rate": 1.3734939759036146e-05,
325
+ "loss": 0.1349,
326
+ "step": 3900
327
+ },
328
+ {
329
+ "epoch": 4.8192771084337345,
330
+ "grad_norm": 2.2596189975738525,
331
+ "learning_rate": 1.357429718875502e-05,
332
+ "loss": 0.1291,
333
+ "step": 4000
334
+ },
335
+ {
336
+ "epoch": 4.9397590361445785,
337
+ "grad_norm": 2.264066457748413,
338
+ "learning_rate": 1.3413654618473897e-05,
339
+ "loss": 0.1412,
340
+ "step": 4100
341
  },
342
  {
343
  "epoch": 5.0,
344
+ "eval_accuracy": 0.38637733574442434,
345
+ "eval_f1": 0.627102627102627,
346
+ "eval_loss": 0.25749197602272034,
347
+ "eval_roc_auc": 0.7690261217645041,
348
+ "eval_runtime": 20.8914,
349
+ "eval_samples_per_second": 79.411,
350
+ "eval_steps_per_second": 9.956,
351
+ "step": 4150
352
+ },
353
+ {
354
+ "epoch": 5.0602409638554215,
355
+ "grad_norm": 1.1299407482147217,
356
+ "learning_rate": 1.3253012048192772e-05,
357
+ "loss": 0.1223,
358
+ "step": 4200
359
+ },
360
+ {
361
+ "epoch": 5.180722891566265,
362
+ "grad_norm": 1.1912181377410889,
363
+ "learning_rate": 1.309236947791165e-05,
364
+ "loss": 0.1112,
365
+ "step": 4300
366
+ },
367
+ {
368
+ "epoch": 5.301204819277109,
369
+ "grad_norm": 2.00722074508667,
370
+ "learning_rate": 1.2931726907630524e-05,
371
+ "loss": 0.1035,
372
+ "step": 4400
373
+ },
374
+ {
375
+ "epoch": 5.421686746987952,
376
+ "grad_norm": 1.543757677078247,
377
+ "learning_rate": 1.2771084337349398e-05,
378
+ "loss": 0.1082,
379
+ "step": 4500
380
+ },
381
+ {
382
+ "epoch": 5.542168674698795,
383
+ "grad_norm": 1.6844489574432373,
384
+ "learning_rate": 1.2610441767068273e-05,
385
+ "loss": 0.1113,
386
+ "step": 4600
387
+ },
388
+ {
389
+ "epoch": 5.662650602409639,
390
+ "grad_norm": 2.3987362384796143,
391
+ "learning_rate": 1.244979919678715e-05,
392
+ "loss": 0.1054,
393
+ "step": 4700
394
+ },
395
+ {
396
+ "epoch": 5.783132530120482,
397
+ "grad_norm": 1.196558952331543,
398
+ "learning_rate": 1.2289156626506024e-05,
399
+ "loss": 0.1076,
400
+ "step": 4800
401
+ },
402
+ {
403
+ "epoch": 5.903614457831325,
404
+ "grad_norm": 1.7159672975540161,
405
+ "learning_rate": 1.2128514056224901e-05,
406
+ "loss": 0.1129,
407
+ "step": 4900
408
+ },
409
+ {
410
+ "epoch": 6.0,
411
+ "eval_accuracy": 0.38396624472573837,
412
+ "eval_f1": 0.6401569186875892,
413
+ "eval_loss": 0.28615912795066833,
414
+ "eval_roc_auc": 0.7876849584660104,
415
+ "eval_runtime": 21.0174,
416
+ "eval_samples_per_second": 78.935,
417
+ "eval_steps_per_second": 9.897,
418
+ "step": 4980
419
+ },
420
+ {
421
+ "epoch": 6.024096385542169,
422
+ "grad_norm": 1.7499059438705444,
423
+ "learning_rate": 1.1967871485943776e-05,
424
+ "loss": 0.101,
425
+ "step": 5000
426
+ },
427
+ {
428
+ "epoch": 6.144578313253012,
429
+ "grad_norm": 1.2233022451400757,
430
+ "learning_rate": 1.1807228915662651e-05,
431
+ "loss": 0.0835,
432
+ "step": 5100
433
+ },
434
+ {
435
+ "epoch": 6.265060240963855,
436
+ "grad_norm": 1.219558596611023,
437
+ "learning_rate": 1.1646586345381528e-05,
438
+ "loss": 0.082,
439
+ "step": 5200
440
+ },
441
+ {
442
+ "epoch": 6.385542168674699,
443
+ "grad_norm": 2.4673707485198975,
444
+ "learning_rate": 1.1485943775100403e-05,
445
+ "loss": 0.0808,
446
+ "step": 5300
447
+ },
448
+ {
449
+ "epoch": 6.506024096385542,
450
+ "grad_norm": 2.749701738357544,
451
+ "learning_rate": 1.132530120481928e-05,
452
+ "loss": 0.0891,
453
+ "step": 5400
454
+ },
455
+ {
456
+ "epoch": 6.626506024096385,
457
+ "grad_norm": 2.653024196624756,
458
+ "learning_rate": 1.1164658634538153e-05,
459
+ "loss": 0.0895,
460
+ "step": 5500
461
+ },
462
+ {
463
+ "epoch": 6.746987951807229,
464
+ "grad_norm": 1.81606924533844,
465
+ "learning_rate": 1.1004016064257028e-05,
466
+ "loss": 0.0827,
467
+ "step": 5600
468
+ },
469
+ {
470
+ "epoch": 6.867469879518072,
471
+ "grad_norm": 2.775585174560547,
472
+ "learning_rate": 1.0843373493975904e-05,
473
+ "loss": 0.0872,
474
+ "step": 5700
475
+ },
476
+ {
477
+ "epoch": 6.9879518072289155,
478
+ "grad_norm": 3.0529415607452393,
479
+ "learning_rate": 1.068273092369478e-05,
480
+ "loss": 0.0754,
481
+ "step": 5800
482
+ },
483
+ {
484
+ "epoch": 7.0,
485
+ "eval_accuracy": 0.3899939722724533,
486
+ "eval_f1": 0.6352293577981651,
487
+ "eval_loss": 0.30027899146080017,
488
+ "eval_roc_auc": 0.7791627807894616,
489
+ "eval_runtime": 20.9855,
490
+ "eval_samples_per_second": 79.054,
491
+ "eval_steps_per_second": 9.912,
492
+ "step": 5810
493
+ },
494
+ {
495
+ "epoch": 7.108433734939759,
496
+ "grad_norm": 1.6993205547332764,
497
+ "learning_rate": 1.0522088353413654e-05,
498
+ "loss": 0.0723,
499
+ "step": 5900
500
+ },
501
+ {
502
+ "epoch": 7.228915662650603,
503
+ "grad_norm": 2.1551597118377686,
504
+ "learning_rate": 1.0361445783132531e-05,
505
+ "loss": 0.0685,
506
+ "step": 6000
507
+ },
508
+ {
509
+ "epoch": 7.349397590361446,
510
+ "grad_norm": 1.4902188777923584,
511
+ "learning_rate": 1.0200803212851406e-05,
512
+ "loss": 0.0664,
513
+ "step": 6100
514
+ },
515
+ {
516
+ "epoch": 7.469879518072289,
517
+ "grad_norm": 0.829651951789856,
518
+ "learning_rate": 1.0040160642570283e-05,
519
+ "loss": 0.0662,
520
+ "step": 6200
521
+ },
522
+ {
523
+ "epoch": 7.590361445783133,
524
+ "grad_norm": 2.141355037689209,
525
+ "learning_rate": 9.879518072289156e-06,
526
+ "loss": 0.0654,
527
+ "step": 6300
528
+ },
529
+ {
530
+ "epoch": 7.710843373493976,
531
+ "grad_norm": 0.7993516325950623,
532
+ "learning_rate": 9.718875502008033e-06,
533
+ "loss": 0.0669,
534
+ "step": 6400
535
+ },
536
+ {
537
+ "epoch": 7.831325301204819,
538
+ "grad_norm": 0.8961694836616516,
539
+ "learning_rate": 9.558232931726908e-06,
540
+ "loss": 0.0677,
541
+ "step": 6500
542
+ },
543
+ {
544
+ "epoch": 7.951807228915663,
545
+ "grad_norm": 1.8866826295852661,
546
+ "learning_rate": 9.397590361445785e-06,
547
+ "loss": 0.058,
548
+ "step": 6600
549
+ },
550
+ {
551
+ "epoch": 8.0,
552
+ "eval_accuracy": 0.3887884267631103,
553
+ "eval_f1": 0.6461868190748349,
554
+ "eval_loss": 0.3176693618297577,
555
+ "eval_roc_auc": 0.790825180731994,
556
+ "eval_runtime": 20.9796,
557
+ "eval_samples_per_second": 79.077,
558
+ "eval_steps_per_second": 9.914,
559
+ "step": 6640
560
+ },
561
+ {
562
+ "epoch": 8.072289156626505,
563
+ "grad_norm": 0.4812127947807312,
564
+ "learning_rate": 9.23694779116466e-06,
565
+ "loss": 0.0543,
566
+ "step": 6700
567
+ },
568
+ {
569
+ "epoch": 8.19277108433735,
570
+ "grad_norm": 1.1170074939727783,
571
+ "learning_rate": 9.076305220883535e-06,
572
+ "loss": 0.0534,
573
+ "step": 6800
574
+ },
575
+ {
576
+ "epoch": 8.313253012048193,
577
+ "grad_norm": 2.044552803039551,
578
+ "learning_rate": 8.91566265060241e-06,
579
+ "loss": 0.0541,
580
+ "step": 6900
581
+ },
582
+ {
583
+ "epoch": 8.433734939759036,
584
+ "grad_norm": 2.3580517768859863,
585
+ "learning_rate": 8.755020080321286e-06,
586
+ "loss": 0.046,
587
+ "step": 7000
588
+ },
589
+ {
590
+ "epoch": 8.55421686746988,
591
+ "grad_norm": 2.568995952606201,
592
+ "learning_rate": 8.594377510040161e-06,
593
+ "loss": 0.0516,
594
+ "step": 7100
595
+ },
596
+ {
597
+ "epoch": 8.674698795180722,
598
+ "grad_norm": 0.7591239213943481,
599
+ "learning_rate": 8.433734939759038e-06,
600
+ "loss": 0.0503,
601
+ "step": 7200
602
+ },
603
+ {
604
+ "epoch": 8.795180722891565,
605
+ "grad_norm": 1.0098503828048706,
606
+ "learning_rate": 8.273092369477911e-06,
607
+ "loss": 0.0452,
608
+ "step": 7300
609
+ },
610
+ {
611
+ "epoch": 8.91566265060241,
612
+ "grad_norm": 2.4211244583129883,
613
+ "learning_rate": 8.112449799196788e-06,
614
+ "loss": 0.0611,
615
+ "step": 7400
616
+ },
617
+ {
618
+ "epoch": 9.0,
619
+ "eval_accuracy": 0.38396624472573837,
620
+ "eval_f1": 0.6377283414722372,
621
+ "eval_loss": 0.33842870593070984,
622
+ "eval_roc_auc": 0.783423869627736,
623
+ "eval_runtime": 20.9102,
624
+ "eval_samples_per_second": 79.339,
625
+ "eval_steps_per_second": 9.947,
626
+ "step": 7470
627
+ },
628
+ {
629
+ "epoch": 9.036144578313253,
630
+ "grad_norm": 1.7786929607391357,
631
+ "learning_rate": 7.951807228915663e-06,
632
+ "loss": 0.0467,
633
+ "step": 7500
634
+ },
635
+ {
636
+ "epoch": 9.156626506024097,
637
+ "grad_norm": 0.9288263916969299,
638
+ "learning_rate": 7.79116465863454e-06,
639
+ "loss": 0.0358,
640
+ "step": 7600
641
+ },
642
+ {
643
+ "epoch": 9.27710843373494,
644
+ "grad_norm": 1.6899335384368896,
645
+ "learning_rate": 7.630522088353415e-06,
646
+ "loss": 0.0403,
647
+ "step": 7700
648
+ },
649
+ {
650
+ "epoch": 9.397590361445783,
651
+ "grad_norm": 0.633351743221283,
652
+ "learning_rate": 7.469879518072289e-06,
653
+ "loss": 0.0409,
654
+ "step": 7800
655
+ },
656
+ {
657
+ "epoch": 9.518072289156626,
658
+ "grad_norm": 1.880730152130127,
659
+ "learning_rate": 7.309236947791165e-06,
660
+ "loss": 0.04,
661
+ "step": 7900
662
+ },
663
+ {
664
+ "epoch": 9.638554216867469,
665
+ "grad_norm": 0.7761407494544983,
666
+ "learning_rate": 7.148594377510041e-06,
667
+ "loss": 0.0416,
668
+ "step": 8000
669
+ },
670
+ {
671
+ "epoch": 9.759036144578314,
672
+ "grad_norm": 1.7540706396102905,
673
+ "learning_rate": 6.987951807228917e-06,
674
+ "loss": 0.0383,
675
+ "step": 8100
676
+ },
677
+ {
678
+ "epoch": 9.879518072289157,
679
+ "grad_norm": 1.621785044670105,
680
+ "learning_rate": 6.8273092369477925e-06,
681
+ "loss": 0.0399,
682
+ "step": 8200
683
+ },
684
+ {
685
+ "epoch": 10.0,
686
+ "grad_norm": 4.052353382110596,
687
+ "learning_rate": 6.666666666666667e-06,
688
+ "loss": 0.0418,
689
+ "step": 8300
690
+ },
691
+ {
692
+ "epoch": 10.0,
693
+ "eval_accuracy": 0.38155515370705245,
694
+ "eval_f1": 0.6405322783672002,
695
+ "eval_loss": 0.3592796325683594,
696
+ "eval_roc_auc": 0.7861850408280293,
697
+ "eval_runtime": 20.9788,
698
+ "eval_samples_per_second": 79.08,
699
+ "eval_steps_per_second": 9.915,
700
+ "step": 8300
701
+ },
702
+ {
703
+ "epoch": 10.120481927710843,
704
+ "grad_norm": 2.524548053741455,
705
+ "learning_rate": 6.5060240963855425e-06,
706
+ "loss": 0.0327,
707
+ "step": 8400
708
+ },
709
+ {
710
+ "epoch": 10.240963855421686,
711
+ "grad_norm": 2.5789272785186768,
712
+ "learning_rate": 6.345381526104418e-06,
713
+ "loss": 0.0329,
714
+ "step": 8500
715
+ },
716
+ {
717
+ "epoch": 10.36144578313253,
718
+ "grad_norm": 1.6276124715805054,
719
+ "learning_rate": 6.184738955823294e-06,
720
+ "loss": 0.0327,
721
+ "step": 8600
722
+ },
723
+ {
724
+ "epoch": 10.481927710843374,
725
+ "grad_norm": 1.2820446491241455,
726
+ "learning_rate": 6.02409638554217e-06,
727
+ "loss": 0.0319,
728
+ "step": 8700
729
+ },
730
+ {
731
+ "epoch": 10.602409638554217,
732
+ "grad_norm": 0.8200409412384033,
733
+ "learning_rate": 5.863453815261044e-06,
734
+ "loss": 0.0338,
735
+ "step": 8800
736
+ },
737
+ {
738
+ "epoch": 10.72289156626506,
739
+ "grad_norm": 1.6061540842056274,
740
+ "learning_rate": 5.70281124497992e-06,
741
+ "loss": 0.0311,
742
+ "step": 8900
743
+ },
744
+ {
745
+ "epoch": 10.843373493975903,
746
+ "grad_norm": 2.1266753673553467,
747
+ "learning_rate": 5.542168674698796e-06,
748
+ "loss": 0.0309,
749
+ "step": 9000
750
+ },
751
+ {
752
+ "epoch": 10.963855421686747,
753
+ "grad_norm": 1.8387219905853271,
754
+ "learning_rate": 5.381526104417672e-06,
755
+ "loss": 0.0338,
756
+ "step": 9100
757
+ },
758
+ {
759
+ "epoch": 11.0,
760
+ "eval_accuracy": 0.38155515370705245,
761
+ "eval_f1": 0.6427417907769604,
762
+ "eval_loss": 0.37010136246681213,
763
+ "eval_roc_auc": 0.787888262607272,
764
+ "eval_runtime": 20.9246,
765
+ "eval_samples_per_second": 79.285,
766
+ "eval_steps_per_second": 9.94,
767
+ "step": 9130
768
+ },
769
+ {
770
+ "epoch": 11.08433734939759,
771
+ "grad_norm": 1.1557573080062866,
772
+ "learning_rate": 5.220883534136547e-06,
773
+ "loss": 0.0261,
774
+ "step": 9200
775
+ },
776
+ {
777
+ "epoch": 11.204819277108435,
778
+ "grad_norm": 0.8962405920028687,
779
+ "learning_rate": 5.060240963855422e-06,
780
+ "loss": 0.0247,
781
+ "step": 9300
782
+ },
783
+ {
784
+ "epoch": 11.325301204819278,
785
+ "grad_norm": 0.8799346089363098,
786
+ "learning_rate": 4.899598393574298e-06,
787
+ "loss": 0.0255,
788
+ "step": 9400
789
+ },
790
+ {
791
+ "epoch": 11.44578313253012,
792
+ "grad_norm": 0.8840754628181458,
793
+ "learning_rate": 4.7389558232931736e-06,
794
+ "loss": 0.0266,
795
+ "step": 9500
796
+ },
797
+ {
798
+ "epoch": 11.566265060240964,
799
+ "grad_norm": 1.1683375835418701,
800
+ "learning_rate": 4.578313253012049e-06,
801
+ "loss": 0.0261,
802
+ "step": 9600
803
+ },
804
+ {
805
+ "epoch": 11.686746987951807,
806
+ "grad_norm": 0.885728120803833,
807
+ "learning_rate": 4.4176706827309244e-06,
808
+ "loss": 0.0257,
809
+ "step": 9700
810
+ },
811
+ {
812
+ "epoch": 11.80722891566265,
813
+ "grad_norm": 0.2890942394733429,
814
+ "learning_rate": 4.2570281124497995e-06,
815
+ "loss": 0.029,
816
+ "step": 9800
817
+ },
818
+ {
819
+ "epoch": 11.927710843373493,
820
+ "grad_norm": 2.2087390422821045,
821
+ "learning_rate": 4.096385542168675e-06,
822
+ "loss": 0.0255,
823
+ "step": 9900
824
+ },
825
+ {
826
+ "epoch": 12.0,
827
+ "eval_accuracy": 0.3845690174804099,
828
+ "eval_f1": 0.6420863309352519,
829
+ "eval_loss": 0.3844664394855499,
830
+ "eval_roc_auc": 0.7870362182333409,
831
+ "eval_runtime": 20.95,
832
+ "eval_samples_per_second": 79.189,
833
+ "eval_steps_per_second": 9.928,
834
+ "step": 9960
835
+ },
836
+ {
837
+ "epoch": 12.048192771084338,
838
+ "grad_norm": 1.7163885831832886,
839
+ "learning_rate": 3.93574297188755e-06,
840
+ "loss": 0.0261,
841
+ "step": 10000
842
+ },
843
+ {
844
+ "epoch": 12.168674698795181,
845
+ "grad_norm": 1.0416496992111206,
846
+ "learning_rate": 3.7751004016064258e-06,
847
+ "loss": 0.0215,
848
+ "step": 10100
849
+ },
850
+ {
851
+ "epoch": 12.289156626506024,
852
+ "grad_norm": 1.3379343748092651,
853
+ "learning_rate": 3.6144578313253016e-06,
854
+ "loss": 0.0223,
855
+ "step": 10200
856
+ },
857
+ {
858
+ "epoch": 12.409638554216867,
859
+ "grad_norm": 0.658170759677887,
860
+ "learning_rate": 3.453815261044177e-06,
861
+ "loss": 0.0192,
862
+ "step": 10300
863
+ },
864
+ {
865
+ "epoch": 12.53012048192771,
866
+ "grad_norm": 0.9047495722770691,
867
+ "learning_rate": 3.2931726907630525e-06,
868
+ "loss": 0.0237,
869
+ "step": 10400
870
+ },
871
+ {
872
+ "epoch": 12.650602409638553,
873
+ "grad_norm": 1.0494842529296875,
874
+ "learning_rate": 3.132530120481928e-06,
875
+ "loss": 0.0193,
876
+ "step": 10500
877
+ },
878
+ {
879
+ "epoch": 12.771084337349398,
880
+ "grad_norm": 0.32704225182533264,
881
+ "learning_rate": 2.9718875502008034e-06,
882
+ "loss": 0.0208,
883
+ "step": 10600
884
+ },
885
+ {
886
+ "epoch": 12.891566265060241,
887
+ "grad_norm": 0.49072301387786865,
888
+ "learning_rate": 2.811244979919679e-06,
889
+ "loss": 0.0202,
890
+ "step": 10700
891
+ },
892
+ {
893
+ "epoch": 13.0,
894
+ "eval_accuracy": 0.38095238095238093,
895
+ "eval_f1": 0.6474642162926313,
896
+ "eval_loss": 0.3947090208530426,
897
+ "eval_roc_auc": 0.7937935574323361,
898
+ "eval_runtime": 21.0756,
899
+ "eval_samples_per_second": 78.716,
900
+ "eval_steps_per_second": 9.869,
901
+ "step": 10790
902
  }
903
  ],
904
+ "logging_steps": 100,
905
+ "max_steps": 12450,
906
+ "num_input_tokens_seen": 0,
907
+ "num_train_epochs": 15,
908
+ "save_steps": 500,
909
+ "stateful_callbacks": {
910
+ "TrainerControl": {
911
+ "args": {
912
+ "should_epoch_stop": false,
913
+ "should_evaluate": false,
914
+ "should_log": false,
915
+ "should_save": true,
916
+ "should_training_stop": false
917
+ },
918
+ "attributes": {}
919
+ }
920
+ },
921
+ "total_flos": 8035320180521232.0,
922
+ "train_batch_size": 8,
923
  "trial_name": null,
924
  "trial_params": null
925
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afc0e69c798b4630ee068a05dd4f1a9404d270ac23a1062f1e52f9348c3979b0
3
- size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90c00f8b7311babb09b5cfce1bf4c2db61f426d28c044e987c559cbb8c1af657
3
+ size 5304