BaltimoreCA68 commited on
Commit
578eccc
·
verified ·
1 Parent(s): ec7f67f

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. final_new/final_model_deberta_macro/added_tokens.json +3 -0
  2. final_new/final_model_deberta_macro/config.json +43 -0
  3. final_new/final_model_deberta_macro/special_tokens_map.json +15 -0
  4. final_new/final_model_deberta_macro/tokenizer.json +0 -0
  5. final_new/final_model_deberta_macro/tokenizer_config.json +59 -0
  6. final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/config.json +43 -0
  7. final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/tokenizer_config.json +59 -0
  8. final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/added_tokens.json +3 -0
  9. final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/config.json +43 -0
  10. final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/special_tokens_map.json +15 -0
  11. final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/tokenizer_config.json +59 -0
  12. final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/trainer_state.json +79 -0
  13. final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/added_tokens.json +3 -0
  14. final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/config.json +43 -0
  15. final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/special_tokens_map.json +15 -0
  16. final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/tokenizer_config.json +59 -0
  17. final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/trainer_state.json +98 -0
  18. final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/added_tokens.json +3 -0
  19. final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/config.json +43 -0
  20. final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/special_tokens_map.json +15 -0
  21. final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/tokenizer_config.json +59 -0
  22. final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/trainer_state.json +136 -0
  23. final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/added_tokens.json +3 -0
  24. final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/config.json +43 -0
  25. final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/special_tokens_map.json +15 -0
  26. final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/tokenizer_config.json +59 -0
  27. final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/trainer_state.json +79 -0
  28. final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/added_tokens.json +3 -0
  29. final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/config.json +43 -0
  30. final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/special_tokens_map.json +15 -0
  31. final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer.json +0 -0
  32. final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer_config.json +59 -0
  33. final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/trainer_state.json +60 -0
  34. final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/added_tokens.json +3 -0
  35. final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/config.json +43 -0
  36. final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/special_tokens_map.json +15 -0
  37. final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/tokenizer_config.json +59 -0
  38. final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/trainer_state.json +79 -0
  39. final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/added_tokens.json +3 -0
  40. final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/config.json +43 -0
  41. final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/tokenizer_config.json +59 -0
  42. final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/trainer_state.json +60 -0
  43. final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/added_tokens.json +3 -0
  44. final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/config.json +43 -0
  45. final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/special_tokens_map.json +15 -0
  46. final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer.json +0 -0
  47. final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer_config.json +59 -0
  48. final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/trainer_state.json +60 -0
  49. final_new/train.py +223 -0
  50. test.py +0 -0
final_new/final_model_deberta_macro/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/final_model_deberta_macro/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/final_model_deberta_macro/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/final_model_deberta_macro/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
final_new/final_model_deberta_macro/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2000,
3
+ "best_metric": 0.5603351693008105,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-1/checkpoint-2000",
5
+ "epoch": 3.2,
6
+ "eval_steps": 1000,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 2.9890623092651367,
15
+ "learning_rate": 1.5929740554926833e-06,
16
+ "loss": 0.6285,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.5754,
22
+ "eval_f1_macro": 0.5120975672490654,
23
+ "eval_loss": 0.6258611083030701,
24
+ "eval_precision_macro": 0.57641307907854,
25
+ "eval_recall_macro": 0.6602000042936652,
26
+ "eval_runtime": 23.9619,
27
+ "eval_samples_per_second": 208.665,
28
+ "eval_steps_per_second": 3.297,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 3.2,
33
+ "grad_norm": 8.2739839553833,
34
+ "learning_rate": 5.316988686221415e-07,
35
+ "loss": 0.577,
36
+ "step": 2000
37
+ },
38
+ {
39
+ "epoch": 3.2,
40
+ "eval_accuracy": 0.6524,
41
+ "eval_f1_macro": 0.5603351693008105,
42
+ "eval_loss": 0.6020554304122925,
43
+ "eval_precision_macro": 0.5861485813010832,
44
+ "eval_recall_macro": 0.6739538216304121,
45
+ "eval_runtime": 23.7261,
46
+ "eval_samples_per_second": 210.738,
47
+ "eval_steps_per_second": 3.33,
48
+ "step": 2000
49
+ }
50
+ ],
51
+ "logging_steps": 1000,
52
+ "max_steps": 2500,
53
+ "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 4,
55
+ "save_steps": 1000,
56
+ "stateful_callbacks": {
57
+ "TrainerControl": {
58
+ "args": {
59
+ "should_epoch_stop": false,
60
+ "should_evaluate": false,
61
+ "should_log": false,
62
+ "should_save": true,
63
+ "should_training_stop": false
64
+ },
65
+ "attributes": {}
66
+ }
67
+ },
68
+ "total_flos": 3.3678819065856e+16,
69
+ "train_batch_size": 64,
70
+ "trial_name": null,
71
+ "trial_params": {
72
+ "learning_rate": 2.33056031036771e-06,
73
+ "num_train_epochs": 4,
74
+ "per_device_train_batch_size": 16,
75
+ "seed": 6,
76
+ "warmup_ratio": 0.12139509415842381,
77
+ "weight_decay": 0.2500490360742963
78
+ }
79
+ }
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 3000,
3
+ "best_metric": 0.6044354346853781,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-18/checkpoint-3000",
5
+ "epoch": 4.8,
6
+ "eval_steps": 1000,
7
+ "global_step": 3000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 2.9424350261688232,
15
+ "learning_rate": 9.350032740691138e-06,
16
+ "loss": 0.6177,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.7014,
22
+ "eval_f1_macro": 0.5868228537163056,
23
+ "eval_loss": 0.5922038555145264,
24
+ "eval_precision_macro": 0.5921018965323757,
25
+ "eval_recall_macro": 0.6720652798127962,
26
+ "eval_runtime": 23.9124,
27
+ "eval_samples_per_second": 209.097,
28
+ "eval_steps_per_second": 3.304,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 3.2,
33
+ "grad_norm": 12.92706298828125,
34
+ "learning_rate": 7.013108786506899e-06,
35
+ "loss": 0.5371,
36
+ "step": 2000
37
+ },
38
+ {
39
+ "epoch": 3.2,
40
+ "eval_accuracy": 0.4686,
41
+ "eval_f1_macro": 0.44141691149088036,
42
+ "eval_loss": 0.6931988000869751,
43
+ "eval_precision_macro": 0.5766681586483515,
44
+ "eval_recall_macro": 0.6479536659850473,
45
+ "eval_runtime": 23.7806,
46
+ "eval_samples_per_second": 210.256,
47
+ "eval_steps_per_second": 3.322,
48
+ "step": 2000
49
+ },
50
+ {
51
+ "epoch": 4.8,
52
+ "grad_norm": 13.355767250061035,
53
+ "learning_rate": 4.6761848323226605e-06,
54
+ "loss": 0.4438,
55
+ "step": 3000
56
+ },
57
+ {
58
+ "epoch": 4.8,
59
+ "eval_accuracy": 0.7276,
60
+ "eval_f1_macro": 0.6044354346853781,
61
+ "eval_loss": 0.6976514458656311,
62
+ "eval_precision_macro": 0.601173813907625,
63
+ "eval_recall_macro": 0.6793853081509867,
64
+ "eval_runtime": 23.8882,
65
+ "eval_samples_per_second": 209.308,
66
+ "eval_steps_per_second": 3.307,
67
+ "step": 3000
68
+ }
69
+ ],
70
+ "logging_steps": 1000,
71
+ "max_steps": 5000,
72
+ "num_input_tokens_seen": 0,
73
+ "num_train_epochs": 8,
74
+ "save_steps": 1000,
75
+ "stateful_callbacks": {
76
+ "TrainerControl": {
77
+ "args": {
78
+ "should_epoch_stop": false,
79
+ "should_evaluate": false,
80
+ "should_log": false,
81
+ "should_save": true,
82
+ "should_training_stop": false
83
+ },
84
+ "attributes": {}
85
+ }
86
+ },
87
+ "total_flos": 5.0518228598784e+16,
88
+ "train_batch_size": 64,
89
+ "trial_name": null,
90
+ "trial_params": {
91
+ "learning_rate": 1.0315182333769228e-05,
92
+ "num_train_epochs": 8,
93
+ "per_device_train_batch_size": 16,
94
+ "seed": 22,
95
+ "warmup_ratio": 0.11705394279119077,
96
+ "weight_decay": 0.13356417654069175
97
+ }
98
+ }
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/trainer_state.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 5000,
3
+ "best_metric": 0.5997370106370481,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-2/checkpoint-5000",
5
+ "epoch": 8.0,
6
+ "eval_steps": 1000,
7
+ "global_step": 5000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 1.8016266822814941,
15
+ "learning_rate": 1.2474286546321821e-05,
16
+ "loss": 0.6152,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.639,
22
+ "eval_f1_macro": 0.5549379425686003,
23
+ "eval_loss": 0.6044051051139832,
24
+ "eval_precision_macro": 0.5880839985122879,
25
+ "eval_recall_macro": 0.6807331165032391,
26
+ "eval_runtime": 23.9947,
27
+ "eval_samples_per_second": 208.38,
28
+ "eval_steps_per_second": 3.292,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 3.2,
33
+ "grad_norm": 3.678083658218384,
34
+ "learning_rate": 9.3564943577885e-06,
35
+ "loss": 0.5344,
36
+ "step": 2000
37
+ },
38
+ {
39
+ "epoch": 3.2,
40
+ "eval_accuracy": 0.7188,
41
+ "eval_f1_macro": 0.5978591232436511,
42
+ "eval_loss": 0.6491641998291016,
43
+ "eval_precision_macro": 0.5972849820479006,
44
+ "eval_recall_macro": 0.6754914904922151,
45
+ "eval_runtime": 23.7704,
46
+ "eval_samples_per_second": 210.346,
47
+ "eval_steps_per_second": 3.323,
48
+ "step": 2000
49
+ },
50
+ {
51
+ "epoch": 4.8,
52
+ "grad_norm": 5.0685858726501465,
53
+ "learning_rate": 6.238702169255177e-06,
54
+ "loss": 0.4334,
55
+ "step": 3000
56
+ },
57
+ {
58
+ "epoch": 4.8,
59
+ "eval_accuracy": 0.6852,
60
+ "eval_f1_macro": 0.5791421344499168,
61
+ "eval_loss": 0.7180050611495972,
62
+ "eval_precision_macro": 0.59082407477529,
63
+ "eval_recall_macro": 0.6754002501059999,
64
+ "eval_runtime": 23.7836,
65
+ "eval_samples_per_second": 210.229,
66
+ "eval_steps_per_second": 3.322,
67
+ "step": 3000
68
+ },
69
+ {
70
+ "epoch": 6.4,
71
+ "grad_norm": 8.405384063720703,
72
+ "learning_rate": 3.1209099807218553e-06,
73
+ "loss": 0.3258,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 6.4,
78
+ "eval_accuracy": 0.7162,
79
+ "eval_f1_macro": 0.5901324695324581,
80
+ "eval_loss": 0.9122663140296936,
81
+ "eval_precision_macro": 0.5900376496467702,
82
+ "eval_recall_macro": 0.6606401049801149,
83
+ "eval_runtime": 23.765,
84
+ "eval_samples_per_second": 210.393,
85
+ "eval_steps_per_second": 3.324,
86
+ "step": 4000
87
+ },
88
+ {
89
+ "epoch": 8.0,
90
+ "grad_norm": 4.3618855476379395,
91
+ "learning_rate": 3.117792188533322e-09,
92
+ "loss": 0.2579,
93
+ "step": 5000
94
+ },
95
+ {
96
+ "epoch": 8.0,
97
+ "eval_accuracy": 0.7296,
98
+ "eval_f1_macro": 0.5997370106370481,
99
+ "eval_loss": 0.9907922148704529,
100
+ "eval_precision_macro": 0.5957433635167557,
101
+ "eval_recall_macro": 0.6659904143923658,
102
+ "eval_runtime": 23.8201,
103
+ "eval_samples_per_second": 209.906,
104
+ "eval_steps_per_second": 3.317,
105
+ "step": 5000
106
+ }
107
+ ],
108
+ "logging_steps": 1000,
109
+ "max_steps": 5000,
110
+ "num_input_tokens_seen": 0,
111
+ "num_train_epochs": 8,
112
+ "save_steps": 1000,
113
+ "stateful_callbacks": {
114
+ "TrainerControl": {
115
+ "args": {
116
+ "should_epoch_stop": false,
117
+ "should_evaluate": false,
118
+ "should_log": false,
119
+ "should_save": true,
120
+ "should_training_stop": true
121
+ },
122
+ "attributes": {}
123
+ }
124
+ },
125
+ "total_flos": 8.419704766464e+16,
126
+ "train_batch_size": 64,
127
+ "trial_name": null,
128
+ "trial_params": {
129
+ "learning_rate": 1.3986415757760482e-05,
130
+ "num_train_epochs": 8,
131
+ "per_device_train_batch_size": 16,
132
+ "seed": 11,
133
+ "warmup_ratio": 0.1027040959832009,
134
+ "weight_decay": 0.14078723455527858
135
+ }
136
+ }
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2000,
3
+ "best_metric": 0.5214696341972573,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-3/checkpoint-2000",
5
+ "epoch": 3.2,
6
+ "eval_steps": 1000,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 2.5334744453430176,
15
+ "learning_rate": 3.3257742507708724e-06,
16
+ "loss": 0.6087,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.583,
22
+ "eval_f1_macro": 0.5200337896212106,
23
+ "eval_loss": 0.6109012961387634,
24
+ "eval_precision_macro": 0.5825661988953263,
25
+ "eval_recall_macro": 0.6731011265504157,
26
+ "eval_runtime": 23.919,
27
+ "eval_samples_per_second": 209.039,
28
+ "eval_steps_per_second": 3.303,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 3.2,
33
+ "grad_norm": 7.089099407196045,
34
+ "learning_rate": 1.1100685540547683e-06,
35
+ "loss": 0.5543,
36
+ "step": 2000
37
+ },
38
+ {
39
+ "epoch": 3.2,
40
+ "eval_accuracy": 0.5828,
41
+ "eval_f1_macro": 0.5214696341972573,
42
+ "eval_loss": 0.6113795042037964,
43
+ "eval_precision_macro": 0.5854079475480009,
44
+ "eval_recall_macro": 0.6790498655546073,
45
+ "eval_runtime": 23.7426,
46
+ "eval_samples_per_second": 210.592,
47
+ "eval_steps_per_second": 3.327,
48
+ "step": 2000
49
+ }
50
+ ],
51
+ "logging_steps": 1000,
52
+ "max_steps": 2500,
53
+ "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 4,
55
+ "save_steps": 1000,
56
+ "stateful_callbacks": {
57
+ "TrainerControl": {
58
+ "args": {
59
+ "should_epoch_stop": false,
60
+ "should_evaluate": false,
61
+ "should_log": false,
62
+ "should_save": true,
63
+ "should_training_stop": false
64
+ },
65
+ "attributes": {}
66
+ }
67
+ },
68
+ "total_flos": 3.3678819065856e+16,
69
+ "train_batch_size": 64,
70
+ "trial_name": null,
71
+ "trial_params": {
72
+ "learning_rate": 5.164809979045239e-06,
73
+ "num_train_epochs": 4,
74
+ "per_device_train_batch_size": 16,
75
+ "seed": 15,
76
+ "warmup_ratio": 0.06755599489879922,
77
+ "weight_decay": 0.23675131092877333
78
+ }
79
+ }
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.5977399564843511,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-4/checkpoint-1000",
5
+ "epoch": 1.6,
6
+ "eval_steps": 1000,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 3.9698081016540527,
15
+ "learning_rate": 3.010016159294345e-06,
16
+ "loss": 0.6006,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.7212,
22
+ "eval_f1_macro": 0.5977399564843511,
23
+ "eval_loss": 0.6234617233276367,
24
+ "eval_precision_macro": 0.5963182657859172,
25
+ "eval_recall_macro": 0.6720324064383509,
26
+ "eval_runtime": 24.0476,
27
+ "eval_samples_per_second": 207.921,
28
+ "eval_steps_per_second": 3.285,
29
+ "step": 1000
30
+ }
31
+ ],
32
+ "logging_steps": 1000,
33
+ "max_steps": 1875,
34
+ "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 3,
36
+ "save_steps": 1000,
37
+ "stateful_callbacks": {
38
+ "TrainerControl": {
39
+ "args": {
40
+ "should_epoch_stop": false,
41
+ "should_evaluate": false,
42
+ "should_log": false,
43
+ "should_save": true,
44
+ "should_training_stop": false
45
+ },
46
+ "attributes": {}
47
+ }
48
+ },
49
+ "total_flos": 1.6839409532928e+16,
50
+ "train_batch_size": 64,
51
+ "trial_name": null,
52
+ "trial_params": {
53
+ "learning_rate": 6.391130201241418e-06,
54
+ "num_train_epochs": 3,
55
+ "per_device_train_batch_size": 16,
56
+ "seed": 14,
57
+ "warmup_ratio": 0.007895358563318623,
58
+ "weight_decay": 0.19664387232971756
59
+ }
60
+ }
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2000,
3
+ "best_metric": 0.5761183261183261,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-5/checkpoint-2000",
5
+ "epoch": 3.2,
6
+ "eval_steps": 1000,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 6.222267150878906,
15
+ "learning_rate": 1.909543747689655e-06,
16
+ "loss": 0.6227,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.6684,
22
+ "eval_f1_macro": 0.5699836334422224,
23
+ "eval_loss": 0.60748291015625,
24
+ "eval_precision_macro": 0.5887745763033124,
25
+ "eval_recall_macro": 0.6759611101271461,
26
+ "eval_runtime": 23.9742,
27
+ "eval_samples_per_second": 208.557,
28
+ "eval_steps_per_second": 3.295,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 3.2,
33
+ "grad_norm": 5.623478889465332,
34
+ "learning_rate": 1.2154166129424158e-06,
35
+ "loss": 0.5704,
36
+ "step": 2000
37
+ },
38
+ {
39
+ "epoch": 3.2,
40
+ "eval_accuracy": 0.6804,
41
+ "eval_f1_macro": 0.5761183261183261,
42
+ "eval_loss": 0.6052666902542114,
43
+ "eval_precision_macro": 0.5896817337700503,
44
+ "eval_recall_macro": 0.6744341754284273,
45
+ "eval_runtime": 23.7946,
46
+ "eval_samples_per_second": 210.132,
47
+ "eval_steps_per_second": 3.32,
48
+ "step": 2000
49
+ }
50
+ ],
51
+ "logging_steps": 1000,
52
+ "max_steps": 3750,
53
+ "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 6,
55
+ "save_steps": 1000,
56
+ "stateful_callbacks": {
57
+ "TrainerControl": {
58
+ "args": {
59
+ "should_epoch_stop": false,
60
+ "should_evaluate": false,
61
+ "should_log": false,
62
+ "should_save": true,
63
+ "should_training_stop": false
64
+ },
65
+ "attributes": {}
66
+ }
67
+ },
68
+ "total_flos": 3.3678819065856e+16,
69
+ "train_batch_size": 64,
70
+ "trial_name": null,
71
+ "trial_params": {
72
+ "learning_rate": 2.4877516509341053e-06,
73
+ "num_train_epochs": 6,
74
+ "per_device_train_batch_size": 16,
75
+ "seed": 18,
76
+ "warmup_ratio": 0.044159519908610584,
77
+ "weight_decay": 0.07842897475620166
78
+ }
79
+ }
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.5778107857758332,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-6/checkpoint-1000",
5
+ "epoch": 1.6,
6
+ "eval_steps": 1000,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 1.95713472366333,
15
+ "learning_rate": 1.0883753415508279e-05,
16
+ "loss": 0.5984,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.6786,
22
+ "eval_f1_macro": 0.5778107857758332,
23
+ "eval_loss": 0.5918202996253967,
24
+ "eval_precision_macro": 0.5927365941052973,
25
+ "eval_recall_macro": 0.6818803301828564,
26
+ "eval_runtime": 23.9576,
27
+ "eval_samples_per_second": 208.702,
28
+ "eval_steps_per_second": 3.297,
29
+ "step": 1000
30
+ }
31
+ ],
32
+ "logging_steps": 1000,
33
+ "max_steps": 2500,
34
+ "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 4,
36
+ "save_steps": 1000,
37
+ "stateful_callbacks": {
38
+ "TrainerControl": {
39
+ "args": {
40
+ "should_epoch_stop": false,
41
+ "should_evaluate": false,
42
+ "should_log": false,
43
+ "should_save": true,
44
+ "should_training_stop": false
45
+ },
46
+ "attributes": {}
47
+ }
48
+ },
49
+ "total_flos": 1.6839409532928e+16,
50
+ "train_batch_size": 64,
51
+ "trial_name": null,
52
+ "trial_params": {
53
+ "learning_rate": 1.720662681878824e-05,
54
+ "num_train_epochs": 4,
55
+ "per_device_train_batch_size": 16,
56
+ "seed": 38,
57
+ "warmup_ratio": 0.05046021852881977,
58
+ "weight_decay": 0.07407011018822314
59
+ }
60
+ }
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "real",
12
+ "1": "fake"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "fake": 1,
18
+ "real": 0
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
+ }
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.5841124806304239,
4
+ "best_model_checkpoint": "./results_hyper_search_DEBERTA/run-8/checkpoint-1000",
5
+ "epoch": 1.6,
6
+ "eval_steps": 1000,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.6,
14
+ "grad_norm": 2.356539249420166,
15
+ "learning_rate": 1.7019489204504472e-05,
16
+ "loss": 0.6009,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.6,
21
+ "eval_accuracy": 0.6922,
22
+ "eval_f1_macro": 0.5841124806304239,
23
+ "eval_loss": 0.6143302917480469,
24
+ "eval_precision_macro": 0.5932105858722077,
25
+ "eval_recall_macro": 0.6782494995196462,
26
+ "eval_runtime": 23.954,
27
+ "eval_samples_per_second": 208.733,
28
+ "eval_steps_per_second": 3.298,
29
+ "step": 1000
30
+ }
31
+ ],
32
+ "logging_steps": 1000,
33
+ "max_steps": 1875,
34
+ "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 3,
36
+ "save_steps": 1000,
37
+ "stateful_callbacks": {
38
+ "TrainerControl": {
39
+ "args": {
40
+ "should_epoch_stop": false,
41
+ "should_evaluate": false,
42
+ "should_log": false,
43
+ "should_save": true,
44
+ "should_training_stop": false
45
+ },
46
+ "attributes": {}
47
+ }
48
+ },
49
+ "total_flos": 1.6839409532928e+16,
50
+ "train_batch_size": 64,
51
+ "trial_name": null,
52
+ "trial_params": {
53
+ "learning_rate": 3.5107553644451574e-05,
54
+ "num_train_epochs": 3,
55
+ "per_device_train_batch_size": 16,
56
+ "seed": 13,
57
+ "warmup_ratio": 0.03608456495612949,
58
+ "weight_decay": 0.0003410574096915697
59
+ }
60
+ }
final_new/train.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==============================================================================
2
+ # 最终决战版-v9: 虚假评论识别
3
+ # 核心策略: DeBERTa-v3 + 类别权重 + 自动超参数搜索 + Batch Size 32
4
+ # ==============================================================================
5
+ import os
6
+ import pandas as pd
7
+ import numpy as np
8
+ import torch
9
+ from torch import nn
10
+ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
11
+ from sklearn.utils.class_weight import compute_class_weight
12
+ from datasets import Dataset
13
+ from transformers import (
14
+ AutoTokenizer,
15
+ AutoModelForSequenceClassification,
16
+ TrainingArguments,
17
+ Trainer,
18
+ EvalPrediction
19
+ )
20
+ import optuna
21
+
22
+ # --- 1. 网络配置 (切换回在线模式以通过镜像下载新模型) ---
23
+ os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
24
+ os.environ['HF_HUB_CACHE'] = '/root/autodl-tmp/huggingface_cache'
25
+ # 注意:这里去掉了 HF_HUB_OFFLINE=1,因为我们需要联网下载 DeBERTa
26
+
27
+ # !!! 核心升级:更换最强 Base 模型 !!!
28
+ MODEL_NAME_OR_PATH = "microsoft/deberta-v3-base"
29
+
30
+ # --- 2. 定义文件路径 ---
31
+ TRAIN_FILE_PATH = "/tmp/home/wzh/file/train_data.csv"
32
+ VALID_FILE_PATH = "/tmp/home/wzh/file/val_data.csv"
33
+
34
+ # --- 3. 加载数据 ---
35
+ print(f"加载训练集: {TRAIN_FILE_PATH}")
36
+ train_df = pd.read_csv(TRAIN_FILE_PATH)
37
+ print(f"加载验证集: {VALID_FILE_PATH}")
38
+ eval_df = pd.read_csv(VALID_FILE_PATH)
39
+
40
+ label_map = {"real": 0, "fake": 1}
41
+ train_df['label'] = train_df['label'].map(label_map)
42
+ eval_df['label'] = eval_df['label'].map(label_map)
43
+
44
+ # --- 4. 计算类别权重 ---
45
+ print("\n正在计算类别权重...")
46
+ train_labels = np.array(train_df["label"])
47
+ class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(train_labels), y=train_labels)
48
+ device = "cuda" if torch.cuda.is_available() else "cpu"
49
+ class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)
50
+ print(f"计算出的类别权重: {class_weights}")
51
+
52
+ # --- 5. 创建Dataset与分词 ---
53
+ train_dataset = Dataset.from_pandas(train_df)
54
+ eval_dataset = Dataset.from_pandas(eval_df)
55
+
56
+ print(f"\n正在下载/加载模型: {MODEL_NAME_OR_PATH} ...")
57
+ # DeBERTa 需要 sentencepiece,确保已安装
58
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
59
+
60
+ def tokenize_function(examples):
61
+ return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
62
+
63
+ tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
64
+ tokenized_eval_dataset = eval_dataset.map(tokenize_function, batched=True)
65
+
66
+ columns_to_remove = ["id", "text"]
67
+ if "__index_level_0__" in train_df.columns:
68
+ columns_to_remove.append("__index_level_0__")
69
+
70
+ tokenized_train_dataset = tokenized_train_dataset.remove_columns(columns_to_remove)
71
+ tokenized_eval_dataset = tokenized_eval_dataset.remove_columns(columns_to_remove)
72
+ tokenized_train_dataset = tokenized_train_dataset.rename_column("label", "labels")
73
+ tokenized_eval_dataset = tokenized_eval_dataset.rename_column("label", "labels")
74
+
75
+ # --- 6. 自定义Trainer (应用类别权重) ---
76
+ class CustomTrainer(Trainer):
77
+ def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
78
+ labels = inputs.pop("labels")
79
+ outputs = model(**inputs)
80
+ logits = outputs.get("logits")
81
+ loss_fct = nn.CrossEntropyLoss(weight=class_weights_tensor)
82
+ loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
83
+ return (loss, outputs) if return_outputs else loss
84
+
85
+ # --- 7. 配置超参数搜索 ---
86
+ id2label = {0: "real", 1: "fake"}
87
+ label2id = {"real": 0, "fake": 1}
88
+
89
+ def model_init(trial):
90
+ return AutoModelForSequenceClassification.from_pretrained(
91
+ MODEL_NAME_OR_PATH,
92
+ num_labels=2,
93
+ id2label=id2label,
94
+ label2id=label2id,
95
+ )
96
+
97
+ def compute_metrics_macro(p: EvalPrediction):
98
+ labels = p.label_ids
99
+ preds = np.argmax(p.predictions, axis=1)
100
+
101
+ f1_macro = f1_score(labels, preds, average='macro', zero_division=0)
102
+ acc = accuracy_score(labels, preds)
103
+ precision_macro = precision_score(labels, preds, average='macro', zero_division=0)
104
+ recall_macro = recall_score(labels, preds, average='macro', zero_division=0)
105
+
106
+ return {
107
+ "accuracy": acc,
108
+ "f1_macro": f1_macro,
109
+ "precision_macro": precision_macro,
110
+ "recall_macro": recall_macro
111
+ }
112
+
113
+ def compute_objective(metrics):
114
+ return metrics['eval_f1_macro']
115
+ def my_hp_space(trial):
116
+ return {
117
+ # 1. 学习率:搜索范围扩大,从极小(1e-6)到标准(5e-5),使用对数刻度
118
+ "learning_rate": trial.suggest_float("learning_rate", 1e-6, 5e-5, log=True),
119
+
120
+ # 2. 训练轮数:允许训练更久,比如 3 到 8 轮 (防止欠拟合)
121
+ "num_train_epochs": trial.suggest_int("num_train_epochs", 3, 8),
122
+
123
+ # 3. 随机种子:尝试不同的随机种子,排除运气的成分
124
+ "seed": trial.suggest_int("seed", 1, 40),
125
+
126
+ # 4. Batch Size:让它在 16 和 32 之间选择
127
+ # (前提是你的显存能跑得动 32,如果不行就删掉这一行,固定为16)
128
+ "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16]),
129
+
130
+ # 5. 权重衰减:正则化系数,防止过拟合
131
+ "weight_decay": trial.suggest_float("weight_decay", 0.0, 0.3),
132
+
133
+ # 6. 预热比例:调整学习率预热的步数比例
134
+ "warmup_ratio": trial.suggest_float("warmup_ratio", 0.0, 0.2),
135
+ }
136
+ training_args = TrainingArguments(
137
+ output_dir="./results_hyper_search_DEBERTA",
138
+
139
+ # !!! 激进设置:尝试 32 Batch Size !!!
140
+ per_device_train_batch_size=16,
141
+ per_device_eval_batch_size=16,
142
+
143
+ weight_decay=0.01,
144
+ eval_strategy="steps",
145
+ eval_steps=1000,
146
+ save_strategy="steps",
147
+ save_steps=1000,
148
+ logging_strategy="steps",
149
+ logging_steps=1000,
150
+ load_best_model_at_end=True,
151
+ metric_for_best_model="f1_macro",
152
+ greater_is_better=True,
153
+ save_total_limit=1,
154
+ )
155
+
156
+ trainer = CustomTrainer(
157
+ model=None,
158
+ args=training_args,
159
+ model_init=model_init,
160
+ train_dataset=tokenized_train_dataset,
161
+ eval_dataset=tokenized_eval_dataset,
162
+ tokenizer=tokenizer,
163
+ compute_metrics=compute_metrics_macro,
164
+ )
165
+
166
+ # --- 8. 开始自动超参数搜索 ---
167
+ print("\n" + "="*50)
168
+ print("🚀 [DeBERTa-v3] 开始自动超参数搜索 (Target: Macro-F1)...")
169
+ print("="*50)
170
+
171
+ best_run = trainer.hyperparameter_search(
172
+ direction="maximize",
173
+ n_trials=20,
174
+ compute_objective=compute_objective,
175
+ backend="optuna",
176
+ hp_space=my_hp_space
177
+ )
178
+
179
+ print("\n" + "="*50)
180
+ print("🎉 搜索完成!")
181
+ print("="*50)
182
+ print(f"最佳 Macro-F1: {best_run.objective:.4f}")
183
+ print("最佳参数组合:", best_run.hyperparameters)
184
+
185
+ # --- 9. 使用最佳参数进行最终训练 ---
186
+ print("\n" + "="*50)
187
+ print("🚀 [DeBERTa-v3] 使用最佳参数进行最终训练...")
188
+ print("="*50)
189
+
190
+ for k, v in best_run.hyperparameters.items():
191
+ setattr(training_args, k, v)
192
+ training_args.output_dir = "./results_final_best_DEBERTA"
193
+ training_args.logging_steps = 200
194
+
195
+ trainer = CustomTrainer(
196
+ model_init=model_init,
197
+ args=training_args,
198
+ train_dataset=tokenized_train_dataset,
199
+ eval_dataset=tokenized_eval_dataset,
200
+ compute_metrics=compute_metrics_macro,
201
+ )
202
+
203
+ trainer.train()
204
+ print("\n" + "="*50)
205
+ print("🎉 最终训练完成!")
206
+ print("="*50)
207
+
208
+ # --- 10. 保存 ---
209
+ final_model_path = "./final_model_deberta_macro"
210
+ trainer.save_model(final_model_path)
211
+ tokenizer.save_pretrained(final_model_path)
212
+ print(f"\nDeBERTa 最优模型已保存至: {final_model_path}")
213
+
214
+ print("\n--- 最终成绩单 (验证集) ---")
215
+ final_metrics = trainer.evaluate()
216
+
217
+ for key, value in final_metrics.items():
218
+ if key.startswith("eval_"):
219
+ key = key[5:]
220
+ if isinstance(value, float):
221
+ print(f" - {key}: {value:.4f}")
222
+ else:
223
+ print(f" - {key}: {value}")
test.py ADDED
File without changes