Shushant commited on
Commit
705ef4a
·
verified ·
1 Parent(s): c26b12f

Upload folder using huggingface_hub

Browse files
checkpoint-2224/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-2224/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 1024,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.45.2",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
checkpoint-2224/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b11bf556b0404394e1ea7c06d8f28c0804f27d6153ff6a6bc3c2afb5f1fbbeae
3
+ size 1740304440
checkpoint-2224/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76c29b10ece3dca07c64cb96fbb78d7739320c19d3fc579decb2483b3db3a06d
3
+ size 3480840240
checkpoint-2224/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85177ca78e6b4785c39813f194ca08fa112d0e0f51bab9987c2f154e66974e86
3
+ size 14244
checkpoint-2224/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b541a4ee466473f63ae56f1de9e4b406788552c9dd2fe81a28f965b1f3c5fa67
3
+ size 1064
checkpoint-2224/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-2224/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-2224/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[PAD]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "[CLS]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "[SEP]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "[UNK]",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128000": {
37
+ "content": "[MASK]",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "[CLS]",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "[CLS]",
48
+ "do_lower_case": false,
49
+ "eos_token": "[SEP]",
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
checkpoint-2224/trainer_state.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.987703144062033,
3
+ "best_model_checkpoint": "./model/checkpoint-2224",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2224,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8992805755395683,
13
+ "grad_norm": 15.831398963928223,
14
+ "learning_rate": 2.731834532374101e-05,
15
+ "loss": 0.1341,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.9858275687531635,
21
+ "eval_f1": 0.9858428119593087,
22
+ "eval_loss": 0.06046506017446518,
23
+ "eval_precision": 0.9858980040864742,
24
+ "eval_recall": 0.9858275687531635,
25
+ "eval_runtime": 123.5336,
26
+ "eval_samples_per_second": 47.979,
27
+ "eval_steps_per_second": 1.506,
28
+ "step": 556
29
+ },
30
+ {
31
+ "epoch": 1.7985611510791366,
32
+ "grad_norm": 0.1419404149055481,
33
+ "learning_rate": 2.4620503597122304e-05,
34
+ "loss": 0.0458,
35
+ "step": 1000
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.9844778133963219,
40
+ "eval_f1": 0.9844685311325714,
41
+ "eval_loss": 0.06095171347260475,
42
+ "eval_precision": 0.984470890080927,
43
+ "eval_recall": 0.9844778133963219,
44
+ "eval_runtime": 324.024,
45
+ "eval_samples_per_second": 18.292,
46
+ "eval_steps_per_second": 0.574,
47
+ "step": 1112
48
+ },
49
+ {
50
+ "epoch": 2.697841726618705,
51
+ "grad_norm": 0.0644543468952179,
52
+ "learning_rate": 2.1928057553956834e-05,
53
+ "loss": 0.0255,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.9856588493335583,
59
+ "eval_f1": 0.9856725274058569,
60
+ "eval_loss": 0.06607956439256668,
61
+ "eval_precision": 0.985717476255087,
62
+ "eval_recall": 0.9856588493335583,
63
+ "eval_runtime": 23.8528,
64
+ "eval_samples_per_second": 248.482,
65
+ "eval_steps_per_second": 7.798,
66
+ "step": 1668
67
+ },
68
+ {
69
+ "epoch": 3.597122302158273,
70
+ "grad_norm": 0.006873908918350935,
71
+ "learning_rate": 1.923021582733813e-05,
72
+ "loss": 0.0098,
73
+ "step": 2000
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.9876834823688206,
78
+ "eval_f1": 0.987703144062033,
79
+ "eval_loss": 0.06861425936222076,
80
+ "eval_precision": 0.9878126950690342,
81
+ "eval_recall": 0.9876834823688206,
82
+ "eval_runtime": 123.3642,
83
+ "eval_samples_per_second": 48.045,
84
+ "eval_steps_per_second": 1.508,
85
+ "step": 2224
86
+ }
87
+ ],
88
+ "logging_steps": 500,
89
+ "max_steps": 5560,
90
+ "num_input_tokens_seen": 0,
91
+ "num_train_epochs": 10,
92
+ "save_steps": 500,
93
+ "stateful_callbacks": {
94
+ "TrainerControl": {
95
+ "args": {
96
+ "should_epoch_stop": false,
97
+ "should_evaluate": false,
98
+ "should_log": false,
99
+ "should_save": true,
100
+ "should_training_stop": false
101
+ },
102
+ "attributes": {}
103
+ }
104
+ },
105
+ "total_flos": 1.656985150205952e+16,
106
+ "train_batch_size": 32,
107
+ "trial_name": null,
108
+ "trial_params": null
109
+ }
checkpoint-2224/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5dd6029c2d02a2d227cfcfcd43bd2b86533f1c7595b8493db0abfe49ac98968
3
+ size 5176
checkpoint-5560/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-5560/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 1024,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.45.2",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
checkpoint-5560/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6782b752469709fc6a72923a4543b5b1d0e57e3b108dfc8dd291b73f9b2daaf2
3
+ size 1740304440
checkpoint-5560/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf6c09c4aea7757c48baa8b2172975b9c1be4e7beed48e5eb43fe2a256de7fe
3
+ size 3480840240
checkpoint-5560/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb310390bdb175362fb078a5ea9c249706f119df8dc0d9b927998297e473ff7
3
+ size 14244
checkpoint-5560/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03a0c13945210832df5a9103e7c3d6a675c2fafeff927b3fe10b9317b709fb44
3
+ size 1064
checkpoint-5560/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-5560/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-5560/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[PAD]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "[CLS]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "[SEP]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "[UNK]",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128000": {
37
+ "content": "[MASK]",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "[CLS]",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "[CLS]",
48
+ "do_lower_case": false,
49
+ "eos_token": "[SEP]",
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
checkpoint-5560/trainer_state.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.987703144062033,
3
+ "best_model_checkpoint": "./model/checkpoint-2224",
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5560,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8992805755395683,
13
+ "grad_norm": 15.831398963928223,
14
+ "learning_rate": 2.731834532374101e-05,
15
+ "loss": 0.1341,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.9858275687531635,
21
+ "eval_f1": 0.9858428119593087,
22
+ "eval_loss": 0.06046506017446518,
23
+ "eval_precision": 0.9858980040864742,
24
+ "eval_recall": 0.9858275687531635,
25
+ "eval_runtime": 123.5336,
26
+ "eval_samples_per_second": 47.979,
27
+ "eval_steps_per_second": 1.506,
28
+ "step": 556
29
+ },
30
+ {
31
+ "epoch": 1.7985611510791366,
32
+ "grad_norm": 0.1419404149055481,
33
+ "learning_rate": 2.4620503597122304e-05,
34
+ "loss": 0.0458,
35
+ "step": 1000
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.9844778133963219,
40
+ "eval_f1": 0.9844685311325714,
41
+ "eval_loss": 0.06095171347260475,
42
+ "eval_precision": 0.984470890080927,
43
+ "eval_recall": 0.9844778133963219,
44
+ "eval_runtime": 324.024,
45
+ "eval_samples_per_second": 18.292,
46
+ "eval_steps_per_second": 0.574,
47
+ "step": 1112
48
+ },
49
+ {
50
+ "epoch": 2.697841726618705,
51
+ "grad_norm": 0.0644543468952179,
52
+ "learning_rate": 2.1928057553956834e-05,
53
+ "loss": 0.0255,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.9856588493335583,
59
+ "eval_f1": 0.9856725274058569,
60
+ "eval_loss": 0.06607956439256668,
61
+ "eval_precision": 0.985717476255087,
62
+ "eval_recall": 0.9856588493335583,
63
+ "eval_runtime": 23.8528,
64
+ "eval_samples_per_second": 248.482,
65
+ "eval_steps_per_second": 7.798,
66
+ "step": 1668
67
+ },
68
+ {
69
+ "epoch": 3.597122302158273,
70
+ "grad_norm": 0.006873908918350935,
71
+ "learning_rate": 1.923021582733813e-05,
72
+ "loss": 0.0098,
73
+ "step": 2000
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.9876834823688206,
78
+ "eval_f1": 0.987703144062033,
79
+ "eval_loss": 0.06861425936222076,
80
+ "eval_precision": 0.9878126950690342,
81
+ "eval_recall": 0.9876834823688206,
82
+ "eval_runtime": 123.3642,
83
+ "eval_samples_per_second": 48.045,
84
+ "eval_steps_per_second": 1.508,
85
+ "step": 2224
86
+ },
87
+ {
88
+ "epoch": 4.496402877697841,
89
+ "grad_norm": 0.0038243578746914864,
90
+ "learning_rate": 1.653776978417266e-05,
91
+ "loss": 0.0051,
92
+ "step": 2500
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.98279061920027,
97
+ "eval_f1": 0.9827652293288254,
98
+ "eval_loss": 0.11865255236625671,
99
+ "eval_precision": 0.9828086020917639,
100
+ "eval_recall": 0.98279061920027,
101
+ "eval_runtime": 123.5475,
102
+ "eval_samples_per_second": 47.973,
103
+ "eval_steps_per_second": 1.505,
104
+ "step": 2780
105
+ },
106
+ {
107
+ "epoch": 5.39568345323741,
108
+ "grad_norm": 0.00035207424662075937,
109
+ "learning_rate": 1.3839928057553959e-05,
110
+ "loss": 0.0012,
111
+ "step": 3000
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.9844778133963219,
116
+ "eval_f1": 0.9844549127279602,
117
+ "eval_loss": 0.10734612494707108,
118
+ "eval_precision": 0.9845009079108045,
119
+ "eval_recall": 0.9844778133963219,
120
+ "eval_runtime": 324.4042,
121
+ "eval_samples_per_second": 18.27,
122
+ "eval_steps_per_second": 0.573,
123
+ "step": 3336
124
+ },
125
+ {
126
+ "epoch": 6.294964028776978,
127
+ "grad_norm": 0.0010561492526903749,
128
+ "learning_rate": 1.1142086330935252e-05,
129
+ "loss": 0.0006,
130
+ "step": 3500
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_accuracy": 0.9844778133963219,
135
+ "eval_f1": 0.9844617747386799,
136
+ "eval_loss": 0.1119009479880333,
137
+ "eval_precision": 0.9844800053876499,
138
+ "eval_recall": 0.9844778133963219,
139
+ "eval_runtime": 123.6461,
140
+ "eval_samples_per_second": 47.935,
141
+ "eval_steps_per_second": 1.504,
142
+ "step": 3892
143
+ },
144
+ {
145
+ "epoch": 7.194244604316546,
146
+ "grad_norm": 0.0001651465572649613,
147
+ "learning_rate": 8.444244604316547e-06,
148
+ "loss": 0.0,
149
+ "step": 4000
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "eval_accuracy": 0.9843090939767167,
154
+ "eval_f1": 0.9842866428636935,
155
+ "eval_loss": 0.12110123783349991,
156
+ "eval_precision": 0.9843290706362859,
157
+ "eval_recall": 0.9843090939767167,
158
+ "eval_runtime": 323.8258,
159
+ "eval_samples_per_second": 18.303,
160
+ "eval_steps_per_second": 0.574,
161
+ "step": 4448
162
+ },
163
+ {
164
+ "epoch": 8.093525179856115,
165
+ "grad_norm": 0.0001012256761896424,
166
+ "learning_rate": 5.7464028776978415e-06,
167
+ "loss": 0.0001,
168
+ "step": 4500
169
+ },
170
+ {
171
+ "epoch": 8.992805755395683,
172
+ "grad_norm": 0.00017143118020612746,
173
+ "learning_rate": 3.048561151079137e-06,
174
+ "loss": 0.0002,
175
+ "step": 5000
176
+ },
177
+ {
178
+ "epoch": 9.0,
179
+ "eval_accuracy": 0.9849839716551375,
180
+ "eval_f1": 0.9849730419562784,
181
+ "eval_loss": 0.11790579557418823,
182
+ "eval_precision": 0.9849792957695417,
183
+ "eval_recall": 0.9849839716551375,
184
+ "eval_runtime": 324.0234,
185
+ "eval_samples_per_second": 18.292,
186
+ "eval_steps_per_second": 0.574,
187
+ "step": 5004
188
+ },
189
+ {
190
+ "epoch": 9.892086330935252,
191
+ "grad_norm": 0.00013196947111282498,
192
+ "learning_rate": 3.507194244604317e-07,
193
+ "loss": 0.0001,
194
+ "step": 5500
195
+ },
196
+ {
197
+ "epoch": 10.0,
198
+ "eval_accuracy": 0.9849839716551375,
199
+ "eval_f1": 0.9849833368509429,
200
+ "eval_loss": 0.11824283748865128,
201
+ "eval_precision": 0.9849827613456668,
202
+ "eval_recall": 0.9849839716551375,
203
+ "eval_runtime": 223.7176,
204
+ "eval_samples_per_second": 26.493,
205
+ "eval_steps_per_second": 0.831,
206
+ "step": 5560
207
+ }
208
+ ],
209
+ "logging_steps": 500,
210
+ "max_steps": 5560,
211
+ "num_input_tokens_seen": 0,
212
+ "num_train_epochs": 10,
213
+ "save_steps": 500,
214
+ "stateful_callbacks": {
215
+ "TrainerControl": {
216
+ "args": {
217
+ "should_epoch_stop": false,
218
+ "should_evaluate": false,
219
+ "should_log": false,
220
+ "should_save": true,
221
+ "should_training_stop": true
222
+ },
223
+ "attributes": {}
224
+ }
225
+ },
226
+ "total_flos": 4.14246287551488e+16,
227
+ "train_batch_size": 32,
228
+ "trial_name": null,
229
+ "trial_params": null
230
+ }
checkpoint-5560/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5dd6029c2d02a2d227cfcfcd43bd2b86533f1c7595b8493db0abfe49ac98968
3
+ size 5176
logs/events.out.tfevents.1748245108.ADAPT-CLIN.1148101.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e8d13a98de951dd92364a3ed0ae7021c12d2b994a9afb2493690d4b6306e85
3
- size 11584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fd8dffb7be0de1bbf2bc2e740f9d40c7b96aabcaffb97fbfa55efc0bc326269
3
+ size 12410
predictions.jsonl ADDED
The diff for this file is too large to render. See raw diff