Add files using upload-large-folder tool
Browse files- final_new/final_model_deberta_macro/added_tokens.json +3 -0
- final_new/final_model_deberta_macro/config.json +43 -0
- final_new/final_model_deberta_macro/special_tokens_map.json +15 -0
- final_new/final_model_deberta_macro/tokenizer.json +0 -0
- final_new/final_model_deberta_macro/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/special_tokens_map.json +15 -0
- final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/trainer_state.json +79 -0
- final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/special_tokens_map.json +15 -0
- final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/trainer_state.json +98 -0
- final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/special_tokens_map.json +15 -0
- final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/trainer_state.json +136 -0
- final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/special_tokens_map.json +15 -0
- final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/trainer_state.json +79 -0
- final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/special_tokens_map.json +15 -0
- final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer.json +0 -0
- final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/trainer_state.json +60 -0
- final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/special_tokens_map.json +15 -0
- final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/trainer_state.json +79 -0
- final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/trainer_state.json +60 -0
- final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/added_tokens.json +3 -0
- final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/config.json +43 -0
- final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/special_tokens_map.json +15 -0
- final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer.json +0 -0
- final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer_config.json +59 -0
- final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/trainer_state.json +60 -0
- final_new/train.py +223 -0
- test.py +0 -0
final_new/final_model_deberta_macro/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/final_model_deberta_macro/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/final_model_deberta_macro/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/final_model_deberta_macro/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
final_new/final_model_deberta_macro/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-0/checkpoint-2000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-1/checkpoint-2000/trainer_state.json
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 2000,
|
| 3 |
+
"best_metric": 0.5603351693008105,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-1/checkpoint-2000",
|
| 5 |
+
"epoch": 3.2,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 2000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 2.9890623092651367,
|
| 15 |
+
"learning_rate": 1.5929740554926833e-06,
|
| 16 |
+
"loss": 0.6285,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.5754,
|
| 22 |
+
"eval_f1_macro": 0.5120975672490654,
|
| 23 |
+
"eval_loss": 0.6258611083030701,
|
| 24 |
+
"eval_precision_macro": 0.57641307907854,
|
| 25 |
+
"eval_recall_macro": 0.6602000042936652,
|
| 26 |
+
"eval_runtime": 23.9619,
|
| 27 |
+
"eval_samples_per_second": 208.665,
|
| 28 |
+
"eval_steps_per_second": 3.297,
|
| 29 |
+
"step": 1000
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 3.2,
|
| 33 |
+
"grad_norm": 8.2739839553833,
|
| 34 |
+
"learning_rate": 5.316988686221415e-07,
|
| 35 |
+
"loss": 0.577,
|
| 36 |
+
"step": 2000
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 3.2,
|
| 40 |
+
"eval_accuracy": 0.6524,
|
| 41 |
+
"eval_f1_macro": 0.5603351693008105,
|
| 42 |
+
"eval_loss": 0.6020554304122925,
|
| 43 |
+
"eval_precision_macro": 0.5861485813010832,
|
| 44 |
+
"eval_recall_macro": 0.6739538216304121,
|
| 45 |
+
"eval_runtime": 23.7261,
|
| 46 |
+
"eval_samples_per_second": 210.738,
|
| 47 |
+
"eval_steps_per_second": 3.33,
|
| 48 |
+
"step": 2000
|
| 49 |
+
}
|
| 50 |
+
],
|
| 51 |
+
"logging_steps": 1000,
|
| 52 |
+
"max_steps": 2500,
|
| 53 |
+
"num_input_tokens_seen": 0,
|
| 54 |
+
"num_train_epochs": 4,
|
| 55 |
+
"save_steps": 1000,
|
| 56 |
+
"stateful_callbacks": {
|
| 57 |
+
"TrainerControl": {
|
| 58 |
+
"args": {
|
| 59 |
+
"should_epoch_stop": false,
|
| 60 |
+
"should_evaluate": false,
|
| 61 |
+
"should_log": false,
|
| 62 |
+
"should_save": true,
|
| 63 |
+
"should_training_stop": false
|
| 64 |
+
},
|
| 65 |
+
"attributes": {}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"total_flos": 3.3678819065856e+16,
|
| 69 |
+
"train_batch_size": 64,
|
| 70 |
+
"trial_name": null,
|
| 71 |
+
"trial_params": {
|
| 72 |
+
"learning_rate": 2.33056031036771e-06,
|
| 73 |
+
"num_train_epochs": 4,
|
| 74 |
+
"per_device_train_batch_size": 16,
|
| 75 |
+
"seed": 6,
|
| 76 |
+
"warmup_ratio": 0.12139509415842381,
|
| 77 |
+
"weight_decay": 0.2500490360742963
|
| 78 |
+
}
|
| 79 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-18/checkpoint-3000/trainer_state.json
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 3000,
|
| 3 |
+
"best_metric": 0.6044354346853781,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-18/checkpoint-3000",
|
| 5 |
+
"epoch": 4.8,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 3000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 2.9424350261688232,
|
| 15 |
+
"learning_rate": 9.350032740691138e-06,
|
| 16 |
+
"loss": 0.6177,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.7014,
|
| 22 |
+
"eval_f1_macro": 0.5868228537163056,
|
| 23 |
+
"eval_loss": 0.5922038555145264,
|
| 24 |
+
"eval_precision_macro": 0.5921018965323757,
|
| 25 |
+
"eval_recall_macro": 0.6720652798127962,
|
| 26 |
+
"eval_runtime": 23.9124,
|
| 27 |
+
"eval_samples_per_second": 209.097,
|
| 28 |
+
"eval_steps_per_second": 3.304,
|
| 29 |
+
"step": 1000
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 3.2,
|
| 33 |
+
"grad_norm": 12.92706298828125,
|
| 34 |
+
"learning_rate": 7.013108786506899e-06,
|
| 35 |
+
"loss": 0.5371,
|
| 36 |
+
"step": 2000
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 3.2,
|
| 40 |
+
"eval_accuracy": 0.4686,
|
| 41 |
+
"eval_f1_macro": 0.44141691149088036,
|
| 42 |
+
"eval_loss": 0.6931988000869751,
|
| 43 |
+
"eval_precision_macro": 0.5766681586483515,
|
| 44 |
+
"eval_recall_macro": 0.6479536659850473,
|
| 45 |
+
"eval_runtime": 23.7806,
|
| 46 |
+
"eval_samples_per_second": 210.256,
|
| 47 |
+
"eval_steps_per_second": 3.322,
|
| 48 |
+
"step": 2000
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"epoch": 4.8,
|
| 52 |
+
"grad_norm": 13.355767250061035,
|
| 53 |
+
"learning_rate": 4.6761848323226605e-06,
|
| 54 |
+
"loss": 0.4438,
|
| 55 |
+
"step": 3000
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 4.8,
|
| 59 |
+
"eval_accuracy": 0.7276,
|
| 60 |
+
"eval_f1_macro": 0.6044354346853781,
|
| 61 |
+
"eval_loss": 0.6976514458656311,
|
| 62 |
+
"eval_precision_macro": 0.601173813907625,
|
| 63 |
+
"eval_recall_macro": 0.6793853081509867,
|
| 64 |
+
"eval_runtime": 23.8882,
|
| 65 |
+
"eval_samples_per_second": 209.308,
|
| 66 |
+
"eval_steps_per_second": 3.307,
|
| 67 |
+
"step": 3000
|
| 68 |
+
}
|
| 69 |
+
],
|
| 70 |
+
"logging_steps": 1000,
|
| 71 |
+
"max_steps": 5000,
|
| 72 |
+
"num_input_tokens_seen": 0,
|
| 73 |
+
"num_train_epochs": 8,
|
| 74 |
+
"save_steps": 1000,
|
| 75 |
+
"stateful_callbacks": {
|
| 76 |
+
"TrainerControl": {
|
| 77 |
+
"args": {
|
| 78 |
+
"should_epoch_stop": false,
|
| 79 |
+
"should_evaluate": false,
|
| 80 |
+
"should_log": false,
|
| 81 |
+
"should_save": true,
|
| 82 |
+
"should_training_stop": false
|
| 83 |
+
},
|
| 84 |
+
"attributes": {}
|
| 85 |
+
}
|
| 86 |
+
},
|
| 87 |
+
"total_flos": 5.0518228598784e+16,
|
| 88 |
+
"train_batch_size": 64,
|
| 89 |
+
"trial_name": null,
|
| 90 |
+
"trial_params": {
|
| 91 |
+
"learning_rate": 1.0315182333769228e-05,
|
| 92 |
+
"num_train_epochs": 8,
|
| 93 |
+
"per_device_train_batch_size": 16,
|
| 94 |
+
"seed": 22,
|
| 95 |
+
"warmup_ratio": 0.11705394279119077,
|
| 96 |
+
"weight_decay": 0.13356417654069175
|
| 97 |
+
}
|
| 98 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-2/checkpoint-5000/trainer_state.json
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 5000,
|
| 3 |
+
"best_metric": 0.5997370106370481,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-2/checkpoint-5000",
|
| 5 |
+
"epoch": 8.0,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 5000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 1.8016266822814941,
|
| 15 |
+
"learning_rate": 1.2474286546321821e-05,
|
| 16 |
+
"loss": 0.6152,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.639,
|
| 22 |
+
"eval_f1_macro": 0.5549379425686003,
|
| 23 |
+
"eval_loss": 0.6044051051139832,
|
| 24 |
+
"eval_precision_macro": 0.5880839985122879,
|
| 25 |
+
"eval_recall_macro": 0.6807331165032391,
|
| 26 |
+
"eval_runtime": 23.9947,
|
| 27 |
+
"eval_samples_per_second": 208.38,
|
| 28 |
+
"eval_steps_per_second": 3.292,
|
| 29 |
+
"step": 1000
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 3.2,
|
| 33 |
+
"grad_norm": 3.678083658218384,
|
| 34 |
+
"learning_rate": 9.3564943577885e-06,
|
| 35 |
+
"loss": 0.5344,
|
| 36 |
+
"step": 2000
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 3.2,
|
| 40 |
+
"eval_accuracy": 0.7188,
|
| 41 |
+
"eval_f1_macro": 0.5978591232436511,
|
| 42 |
+
"eval_loss": 0.6491641998291016,
|
| 43 |
+
"eval_precision_macro": 0.5972849820479006,
|
| 44 |
+
"eval_recall_macro": 0.6754914904922151,
|
| 45 |
+
"eval_runtime": 23.7704,
|
| 46 |
+
"eval_samples_per_second": 210.346,
|
| 47 |
+
"eval_steps_per_second": 3.323,
|
| 48 |
+
"step": 2000
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"epoch": 4.8,
|
| 52 |
+
"grad_norm": 5.0685858726501465,
|
| 53 |
+
"learning_rate": 6.238702169255177e-06,
|
| 54 |
+
"loss": 0.4334,
|
| 55 |
+
"step": 3000
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 4.8,
|
| 59 |
+
"eval_accuracy": 0.6852,
|
| 60 |
+
"eval_f1_macro": 0.5791421344499168,
|
| 61 |
+
"eval_loss": 0.7180050611495972,
|
| 62 |
+
"eval_precision_macro": 0.59082407477529,
|
| 63 |
+
"eval_recall_macro": 0.6754002501059999,
|
| 64 |
+
"eval_runtime": 23.7836,
|
| 65 |
+
"eval_samples_per_second": 210.229,
|
| 66 |
+
"eval_steps_per_second": 3.322,
|
| 67 |
+
"step": 3000
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"epoch": 6.4,
|
| 71 |
+
"grad_norm": 8.405384063720703,
|
| 72 |
+
"learning_rate": 3.1209099807218553e-06,
|
| 73 |
+
"loss": 0.3258,
|
| 74 |
+
"step": 4000
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"epoch": 6.4,
|
| 78 |
+
"eval_accuracy": 0.7162,
|
| 79 |
+
"eval_f1_macro": 0.5901324695324581,
|
| 80 |
+
"eval_loss": 0.9122663140296936,
|
| 81 |
+
"eval_precision_macro": 0.5900376496467702,
|
| 82 |
+
"eval_recall_macro": 0.6606401049801149,
|
| 83 |
+
"eval_runtime": 23.765,
|
| 84 |
+
"eval_samples_per_second": 210.393,
|
| 85 |
+
"eval_steps_per_second": 3.324,
|
| 86 |
+
"step": 4000
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 8.0,
|
| 90 |
+
"grad_norm": 4.3618855476379395,
|
| 91 |
+
"learning_rate": 3.117792188533322e-09,
|
| 92 |
+
"loss": 0.2579,
|
| 93 |
+
"step": 5000
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 8.0,
|
| 97 |
+
"eval_accuracy": 0.7296,
|
| 98 |
+
"eval_f1_macro": 0.5997370106370481,
|
| 99 |
+
"eval_loss": 0.9907922148704529,
|
| 100 |
+
"eval_precision_macro": 0.5957433635167557,
|
| 101 |
+
"eval_recall_macro": 0.6659904143923658,
|
| 102 |
+
"eval_runtime": 23.8201,
|
| 103 |
+
"eval_samples_per_second": 209.906,
|
| 104 |
+
"eval_steps_per_second": 3.317,
|
| 105 |
+
"step": 5000
|
| 106 |
+
}
|
| 107 |
+
],
|
| 108 |
+
"logging_steps": 1000,
|
| 109 |
+
"max_steps": 5000,
|
| 110 |
+
"num_input_tokens_seen": 0,
|
| 111 |
+
"num_train_epochs": 8,
|
| 112 |
+
"save_steps": 1000,
|
| 113 |
+
"stateful_callbacks": {
|
| 114 |
+
"TrainerControl": {
|
| 115 |
+
"args": {
|
| 116 |
+
"should_epoch_stop": false,
|
| 117 |
+
"should_evaluate": false,
|
| 118 |
+
"should_log": false,
|
| 119 |
+
"should_save": true,
|
| 120 |
+
"should_training_stop": true
|
| 121 |
+
},
|
| 122 |
+
"attributes": {}
|
| 123 |
+
}
|
| 124 |
+
},
|
| 125 |
+
"total_flos": 8.419704766464e+16,
|
| 126 |
+
"train_batch_size": 64,
|
| 127 |
+
"trial_name": null,
|
| 128 |
+
"trial_params": {
|
| 129 |
+
"learning_rate": 1.3986415757760482e-05,
|
| 130 |
+
"num_train_epochs": 8,
|
| 131 |
+
"per_device_train_batch_size": 16,
|
| 132 |
+
"seed": 11,
|
| 133 |
+
"warmup_ratio": 0.1027040959832009,
|
| 134 |
+
"weight_decay": 0.14078723455527858
|
| 135 |
+
}
|
| 136 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-3/checkpoint-2000/trainer_state.json
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 2000,
|
| 3 |
+
"best_metric": 0.5214696341972573,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-3/checkpoint-2000",
|
| 5 |
+
"epoch": 3.2,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 2000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 2.5334744453430176,
|
| 15 |
+
"learning_rate": 3.3257742507708724e-06,
|
| 16 |
+
"loss": 0.6087,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.583,
|
| 22 |
+
"eval_f1_macro": 0.5200337896212106,
|
| 23 |
+
"eval_loss": 0.6109012961387634,
|
| 24 |
+
"eval_precision_macro": 0.5825661988953263,
|
| 25 |
+
"eval_recall_macro": 0.6731011265504157,
|
| 26 |
+
"eval_runtime": 23.919,
|
| 27 |
+
"eval_samples_per_second": 209.039,
|
| 28 |
+
"eval_steps_per_second": 3.303,
|
| 29 |
+
"step": 1000
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 3.2,
|
| 33 |
+
"grad_norm": 7.089099407196045,
|
| 34 |
+
"learning_rate": 1.1100685540547683e-06,
|
| 35 |
+
"loss": 0.5543,
|
| 36 |
+
"step": 2000
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 3.2,
|
| 40 |
+
"eval_accuracy": 0.5828,
|
| 41 |
+
"eval_f1_macro": 0.5214696341972573,
|
| 42 |
+
"eval_loss": 0.6113795042037964,
|
| 43 |
+
"eval_precision_macro": 0.5854079475480009,
|
| 44 |
+
"eval_recall_macro": 0.6790498655546073,
|
| 45 |
+
"eval_runtime": 23.7426,
|
| 46 |
+
"eval_samples_per_second": 210.592,
|
| 47 |
+
"eval_steps_per_second": 3.327,
|
| 48 |
+
"step": 2000
|
| 49 |
+
}
|
| 50 |
+
],
|
| 51 |
+
"logging_steps": 1000,
|
| 52 |
+
"max_steps": 2500,
|
| 53 |
+
"num_input_tokens_seen": 0,
|
| 54 |
+
"num_train_epochs": 4,
|
| 55 |
+
"save_steps": 1000,
|
| 56 |
+
"stateful_callbacks": {
|
| 57 |
+
"TrainerControl": {
|
| 58 |
+
"args": {
|
| 59 |
+
"should_epoch_stop": false,
|
| 60 |
+
"should_evaluate": false,
|
| 61 |
+
"should_log": false,
|
| 62 |
+
"should_save": true,
|
| 63 |
+
"should_training_stop": false
|
| 64 |
+
},
|
| 65 |
+
"attributes": {}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"total_flos": 3.3678819065856e+16,
|
| 69 |
+
"train_batch_size": 64,
|
| 70 |
+
"trial_name": null,
|
| 71 |
+
"trial_params": {
|
| 72 |
+
"learning_rate": 5.164809979045239e-06,
|
| 73 |
+
"num_train_epochs": 4,
|
| 74 |
+
"per_device_train_batch_size": 16,
|
| 75 |
+
"seed": 15,
|
| 76 |
+
"warmup_ratio": 0.06755599489879922,
|
| 77 |
+
"weight_decay": 0.23675131092877333
|
| 78 |
+
}
|
| 79 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-4/checkpoint-1000/trainer_state.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1000,
|
| 3 |
+
"best_metric": 0.5977399564843511,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-4/checkpoint-1000",
|
| 5 |
+
"epoch": 1.6,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 1000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 3.9698081016540527,
|
| 15 |
+
"learning_rate": 3.010016159294345e-06,
|
| 16 |
+
"loss": 0.6006,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.7212,
|
| 22 |
+
"eval_f1_macro": 0.5977399564843511,
|
| 23 |
+
"eval_loss": 0.6234617233276367,
|
| 24 |
+
"eval_precision_macro": 0.5963182657859172,
|
| 25 |
+
"eval_recall_macro": 0.6720324064383509,
|
| 26 |
+
"eval_runtime": 24.0476,
|
| 27 |
+
"eval_samples_per_second": 207.921,
|
| 28 |
+
"eval_steps_per_second": 3.285,
|
| 29 |
+
"step": 1000
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
"logging_steps": 1000,
|
| 33 |
+
"max_steps": 1875,
|
| 34 |
+
"num_input_tokens_seen": 0,
|
| 35 |
+
"num_train_epochs": 3,
|
| 36 |
+
"save_steps": 1000,
|
| 37 |
+
"stateful_callbacks": {
|
| 38 |
+
"TrainerControl": {
|
| 39 |
+
"args": {
|
| 40 |
+
"should_epoch_stop": false,
|
| 41 |
+
"should_evaluate": false,
|
| 42 |
+
"should_log": false,
|
| 43 |
+
"should_save": true,
|
| 44 |
+
"should_training_stop": false
|
| 45 |
+
},
|
| 46 |
+
"attributes": {}
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"total_flos": 1.6839409532928e+16,
|
| 50 |
+
"train_batch_size": 64,
|
| 51 |
+
"trial_name": null,
|
| 52 |
+
"trial_params": {
|
| 53 |
+
"learning_rate": 6.391130201241418e-06,
|
| 54 |
+
"num_train_epochs": 3,
|
| 55 |
+
"per_device_train_batch_size": 16,
|
| 56 |
+
"seed": 14,
|
| 57 |
+
"warmup_ratio": 0.007895358563318623,
|
| 58 |
+
"weight_decay": 0.19664387232971756
|
| 59 |
+
}
|
| 60 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-5/checkpoint-2000/trainer_state.json
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 2000,
|
| 3 |
+
"best_metric": 0.5761183261183261,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-5/checkpoint-2000",
|
| 5 |
+
"epoch": 3.2,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 2000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 6.222267150878906,
|
| 15 |
+
"learning_rate": 1.909543747689655e-06,
|
| 16 |
+
"loss": 0.6227,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.6684,
|
| 22 |
+
"eval_f1_macro": 0.5699836334422224,
|
| 23 |
+
"eval_loss": 0.60748291015625,
|
| 24 |
+
"eval_precision_macro": 0.5887745763033124,
|
| 25 |
+
"eval_recall_macro": 0.6759611101271461,
|
| 26 |
+
"eval_runtime": 23.9742,
|
| 27 |
+
"eval_samples_per_second": 208.557,
|
| 28 |
+
"eval_steps_per_second": 3.295,
|
| 29 |
+
"step": 1000
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 3.2,
|
| 33 |
+
"grad_norm": 5.623478889465332,
|
| 34 |
+
"learning_rate": 1.2154166129424158e-06,
|
| 35 |
+
"loss": 0.5704,
|
| 36 |
+
"step": 2000
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 3.2,
|
| 40 |
+
"eval_accuracy": 0.6804,
|
| 41 |
+
"eval_f1_macro": 0.5761183261183261,
|
| 42 |
+
"eval_loss": 0.6052666902542114,
|
| 43 |
+
"eval_precision_macro": 0.5896817337700503,
|
| 44 |
+
"eval_recall_macro": 0.6744341754284273,
|
| 45 |
+
"eval_runtime": 23.7946,
|
| 46 |
+
"eval_samples_per_second": 210.132,
|
| 47 |
+
"eval_steps_per_second": 3.32,
|
| 48 |
+
"step": 2000
|
| 49 |
+
}
|
| 50 |
+
],
|
| 51 |
+
"logging_steps": 1000,
|
| 52 |
+
"max_steps": 3750,
|
| 53 |
+
"num_input_tokens_seen": 0,
|
| 54 |
+
"num_train_epochs": 6,
|
| 55 |
+
"save_steps": 1000,
|
| 56 |
+
"stateful_callbacks": {
|
| 57 |
+
"TrainerControl": {
|
| 58 |
+
"args": {
|
| 59 |
+
"should_epoch_stop": false,
|
| 60 |
+
"should_evaluate": false,
|
| 61 |
+
"should_log": false,
|
| 62 |
+
"should_save": true,
|
| 63 |
+
"should_training_stop": false
|
| 64 |
+
},
|
| 65 |
+
"attributes": {}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"total_flos": 3.3678819065856e+16,
|
| 69 |
+
"train_batch_size": 64,
|
| 70 |
+
"trial_name": null,
|
| 71 |
+
"trial_params": {
|
| 72 |
+
"learning_rate": 2.4877516509341053e-06,
|
| 73 |
+
"num_train_epochs": 6,
|
| 74 |
+
"per_device_train_batch_size": 16,
|
| 75 |
+
"seed": 18,
|
| 76 |
+
"warmup_ratio": 0.044159519908610584,
|
| 77 |
+
"weight_decay": 0.07842897475620166
|
| 78 |
+
}
|
| 79 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-6/checkpoint-1000/trainer_state.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1000,
|
| 3 |
+
"best_metric": 0.5778107857758332,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-6/checkpoint-1000",
|
| 5 |
+
"epoch": 1.6,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 1000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 1.95713472366333,
|
| 15 |
+
"learning_rate": 1.0883753415508279e-05,
|
| 16 |
+
"loss": 0.5984,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.6786,
|
| 22 |
+
"eval_f1_macro": 0.5778107857758332,
|
| 23 |
+
"eval_loss": 0.5918202996253967,
|
| 24 |
+
"eval_precision_macro": 0.5927365941052973,
|
| 25 |
+
"eval_recall_macro": 0.6818803301828564,
|
| 26 |
+
"eval_runtime": 23.9576,
|
| 27 |
+
"eval_samples_per_second": 208.702,
|
| 28 |
+
"eval_steps_per_second": 3.297,
|
| 29 |
+
"step": 1000
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
"logging_steps": 1000,
|
| 33 |
+
"max_steps": 2500,
|
| 34 |
+
"num_input_tokens_seen": 0,
|
| 35 |
+
"num_train_epochs": 4,
|
| 36 |
+
"save_steps": 1000,
|
| 37 |
+
"stateful_callbacks": {
|
| 38 |
+
"TrainerControl": {
|
| 39 |
+
"args": {
|
| 40 |
+
"should_epoch_stop": false,
|
| 41 |
+
"should_evaluate": false,
|
| 42 |
+
"should_log": false,
|
| 43 |
+
"should_save": true,
|
| 44 |
+
"should_training_stop": false
|
| 45 |
+
},
|
| 46 |
+
"attributes": {}
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"total_flos": 1.6839409532928e+16,
|
| 50 |
+
"train_batch_size": 64,
|
| 51 |
+
"trial_name": null,
|
| 52 |
+
"trial_params": {
|
| 53 |
+
"learning_rate": 1.720662681878824e-05,
|
| 54 |
+
"num_train_epochs": 4,
|
| 55 |
+
"per_device_train_batch_size": 16,
|
| 56 |
+
"seed": 38,
|
| 57 |
+
"warmup_ratio": 0.05046021852881977,
|
| 58 |
+
"weight_decay": 0.07407011018822314
|
| 59 |
+
}
|
| 60 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[MASK]": 128000
|
| 3 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DebertaV2ForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"dtype": "float32",
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 768,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "real",
|
| 12 |
+
"1": "fake"
|
| 13 |
+
},
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"fake": 1,
|
| 18 |
+
"real": 0
|
| 19 |
+
},
|
| 20 |
+
"layer_norm_eps": 1e-07,
|
| 21 |
+
"legacy": true,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"max_relative_positions": -1,
|
| 24 |
+
"model_type": "deberta-v2",
|
| 25 |
+
"norm_rel_ebd": "layer_norm",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"pad_token_id": 0,
|
| 29 |
+
"pooler_dropout": 0,
|
| 30 |
+
"pooler_hidden_act": "gelu",
|
| 31 |
+
"pooler_hidden_size": 768,
|
| 32 |
+
"pos_att_type": [
|
| 33 |
+
"p2c",
|
| 34 |
+
"c2p"
|
| 35 |
+
],
|
| 36 |
+
"position_biased_input": false,
|
| 37 |
+
"position_buckets": 256,
|
| 38 |
+
"relative_attention": true,
|
| 39 |
+
"share_att_key": true,
|
| 40 |
+
"transformers_version": "4.57.1",
|
| 41 |
+
"type_vocab_size": 0,
|
| 42 |
+
"vocab_size": 128100
|
| 43 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": {
|
| 9 |
+
"content": "[UNK]",
|
| 10 |
+
"lstrip": false,
|
| 11 |
+
"normalized": true,
|
| 12 |
+
"rstrip": false,
|
| 13 |
+
"single_word": false
|
| 14 |
+
}
|
| 15 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "[CLS]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "[SEP]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "[UNK]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"128000": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "[CLS]",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "[CLS]",
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"eos_token": "[SEP]",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"sp_model_kwargs": {},
|
| 55 |
+
"split_by_punct": false,
|
| 56 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 57 |
+
"unk_token": "[UNK]",
|
| 58 |
+
"vocab_type": "spm"
|
| 59 |
+
}
|
final_new/results_hyper_search_DEBERTA/run-8/checkpoint-1000/trainer_state.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1000,
|
| 3 |
+
"best_metric": 0.5841124806304239,
|
| 4 |
+
"best_model_checkpoint": "./results_hyper_search_DEBERTA/run-8/checkpoint-1000",
|
| 5 |
+
"epoch": 1.6,
|
| 6 |
+
"eval_steps": 1000,
|
| 7 |
+
"global_step": 1000,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.6,
|
| 14 |
+
"grad_norm": 2.356539249420166,
|
| 15 |
+
"learning_rate": 1.7019489204504472e-05,
|
| 16 |
+
"loss": 0.6009,
|
| 17 |
+
"step": 1000
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.6,
|
| 21 |
+
"eval_accuracy": 0.6922,
|
| 22 |
+
"eval_f1_macro": 0.5841124806304239,
|
| 23 |
+
"eval_loss": 0.6143302917480469,
|
| 24 |
+
"eval_precision_macro": 0.5932105858722077,
|
| 25 |
+
"eval_recall_macro": 0.6782494995196462,
|
| 26 |
+
"eval_runtime": 23.954,
|
| 27 |
+
"eval_samples_per_second": 208.733,
|
| 28 |
+
"eval_steps_per_second": 3.298,
|
| 29 |
+
"step": 1000
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
"logging_steps": 1000,
|
| 33 |
+
"max_steps": 1875,
|
| 34 |
+
"num_input_tokens_seen": 0,
|
| 35 |
+
"num_train_epochs": 3,
|
| 36 |
+
"save_steps": 1000,
|
| 37 |
+
"stateful_callbacks": {
|
| 38 |
+
"TrainerControl": {
|
| 39 |
+
"args": {
|
| 40 |
+
"should_epoch_stop": false,
|
| 41 |
+
"should_evaluate": false,
|
| 42 |
+
"should_log": false,
|
| 43 |
+
"should_save": true,
|
| 44 |
+
"should_training_stop": false
|
| 45 |
+
},
|
| 46 |
+
"attributes": {}
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"total_flos": 1.6839409532928e+16,
|
| 50 |
+
"train_batch_size": 64,
|
| 51 |
+
"trial_name": null,
|
| 52 |
+
"trial_params": {
|
| 53 |
+
"learning_rate": 3.5107553644451574e-05,
|
| 54 |
+
"num_train_epochs": 3,
|
| 55 |
+
"per_device_train_batch_size": 16,
|
| 56 |
+
"seed": 13,
|
| 57 |
+
"warmup_ratio": 0.03608456495612949,
|
| 58 |
+
"weight_decay": 0.0003410574096915697
|
| 59 |
+
}
|
| 60 |
+
}
|
final_new/train.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ==============================================================================
|
| 2 |
+
# 最终决战版-v9: 虚假评论识别
|
| 3 |
+
# 核心策略: DeBERTa-v3 + 类别权重 + 自动超参数搜索 + Batch Size 32
|
| 4 |
+
# ==============================================================================
|
| 5 |
+
import os
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import numpy as np
|
| 8 |
+
import torch
|
| 9 |
+
from torch import nn
|
| 10 |
+
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|
| 11 |
+
from sklearn.utils.class_weight import compute_class_weight
|
| 12 |
+
from datasets import Dataset
|
| 13 |
+
from transformers import (
|
| 14 |
+
AutoTokenizer,
|
| 15 |
+
AutoModelForSequenceClassification,
|
| 16 |
+
TrainingArguments,
|
| 17 |
+
Trainer,
|
| 18 |
+
EvalPrediction
|
| 19 |
+
)
|
| 20 |
+
import optuna
|
| 21 |
+
|
| 22 |
+
# --- 1. 网络配置 (切换回在线模式以通过镜像下载新模型) ---
|
| 23 |
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
| 24 |
+
os.environ['HF_HUB_CACHE'] = '/root/autodl-tmp/huggingface_cache'
|
| 25 |
+
# 注意:这里去掉了 HF_HUB_OFFLINE=1,因为我们需要联网下载 DeBERTa
|
| 26 |
+
|
| 27 |
+
# !!! 核心升级:更换最强 Base 模型 !!!
|
| 28 |
+
MODEL_NAME_OR_PATH = "microsoft/deberta-v3-base"
|
| 29 |
+
|
| 30 |
+
# --- 2. 定义文件路径 ---
|
| 31 |
+
TRAIN_FILE_PATH = "/tmp/home/wzh/file/train_data.csv"
|
| 32 |
+
VALID_FILE_PATH = "/tmp/home/wzh/file/val_data.csv"
|
| 33 |
+
|
| 34 |
+
# --- 3. 加载数据 ---
|
| 35 |
+
print(f"加载训练集: {TRAIN_FILE_PATH}")
|
| 36 |
+
train_df = pd.read_csv(TRAIN_FILE_PATH)
|
| 37 |
+
print(f"加载验证集: {VALID_FILE_PATH}")
|
| 38 |
+
eval_df = pd.read_csv(VALID_FILE_PATH)
|
| 39 |
+
|
| 40 |
+
label_map = {"real": 0, "fake": 1}
|
| 41 |
+
train_df['label'] = train_df['label'].map(label_map)
|
| 42 |
+
eval_df['label'] = eval_df['label'].map(label_map)
|
| 43 |
+
|
| 44 |
+
# --- 4. 计算类别权重 ---
|
| 45 |
+
print("\n正在计算类别权重...")
|
| 46 |
+
train_labels = np.array(train_df["label"])
|
| 47 |
+
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(train_labels), y=train_labels)
|
| 48 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 49 |
+
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)
|
| 50 |
+
print(f"计算出的类别权重: {class_weights}")
|
| 51 |
+
|
| 52 |
+
# --- 5. 创建Dataset与分词 ---
|
| 53 |
+
train_dataset = Dataset.from_pandas(train_df)
|
| 54 |
+
eval_dataset = Dataset.from_pandas(eval_df)
|
| 55 |
+
|
| 56 |
+
print(f"\n正在下载/加载模型: {MODEL_NAME_OR_PATH} ...")
|
| 57 |
+
# DeBERTa 需要 sentencepiece,确保已安装
|
| 58 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
|
| 59 |
+
|
| 60 |
+
def tokenize_function(examples):
|
| 61 |
+
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
|
| 62 |
+
|
| 63 |
+
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
|
| 64 |
+
tokenized_eval_dataset = eval_dataset.map(tokenize_function, batched=True)
|
| 65 |
+
|
| 66 |
+
columns_to_remove = ["id", "text"]
|
| 67 |
+
if "__index_level_0__" in train_df.columns:
|
| 68 |
+
columns_to_remove.append("__index_level_0__")
|
| 69 |
+
|
| 70 |
+
tokenized_train_dataset = tokenized_train_dataset.remove_columns(columns_to_remove)
|
| 71 |
+
tokenized_eval_dataset = tokenized_eval_dataset.remove_columns(columns_to_remove)
|
| 72 |
+
tokenized_train_dataset = tokenized_train_dataset.rename_column("label", "labels")
|
| 73 |
+
tokenized_eval_dataset = tokenized_eval_dataset.rename_column("label", "labels")
|
| 74 |
+
|
| 75 |
+
# --- 6. 自定义Trainer (应用类别权重) ---
|
| 76 |
+
class CustomTrainer(Trainer):
|
| 77 |
+
def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
|
| 78 |
+
labels = inputs.pop("labels")
|
| 79 |
+
outputs = model(**inputs)
|
| 80 |
+
logits = outputs.get("logits")
|
| 81 |
+
loss_fct = nn.CrossEntropyLoss(weight=class_weights_tensor)
|
| 82 |
+
loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
|
| 83 |
+
return (loss, outputs) if return_outputs else loss
|
| 84 |
+
|
| 85 |
+
# --- 7. 配置超参数搜索 ---
|
| 86 |
+
id2label = {0: "real", 1: "fake"}
|
| 87 |
+
label2id = {"real": 0, "fake": 1}
|
| 88 |
+
|
| 89 |
+
def model_init(trial):
|
| 90 |
+
return AutoModelForSequenceClassification.from_pretrained(
|
| 91 |
+
MODEL_NAME_OR_PATH,
|
| 92 |
+
num_labels=2,
|
| 93 |
+
id2label=id2label,
|
| 94 |
+
label2id=label2id,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
def compute_metrics_macro(p: EvalPrediction):
|
| 98 |
+
labels = p.label_ids
|
| 99 |
+
preds = np.argmax(p.predictions, axis=1)
|
| 100 |
+
|
| 101 |
+
f1_macro = f1_score(labels, preds, average='macro', zero_division=0)
|
| 102 |
+
acc = accuracy_score(labels, preds)
|
| 103 |
+
precision_macro = precision_score(labels, preds, average='macro', zero_division=0)
|
| 104 |
+
recall_macro = recall_score(labels, preds, average='macro', zero_division=0)
|
| 105 |
+
|
| 106 |
+
return {
|
| 107 |
+
"accuracy": acc,
|
| 108 |
+
"f1_macro": f1_macro,
|
| 109 |
+
"precision_macro": precision_macro,
|
| 110 |
+
"recall_macro": recall_macro
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
def compute_objective(metrics):
|
| 114 |
+
return metrics['eval_f1_macro']
|
| 115 |
+
def my_hp_space(trial):
|
| 116 |
+
return {
|
| 117 |
+
# 1. 学习率:搜索范围扩大,从极小(1e-6)到标准(5e-5),使用对数刻度
|
| 118 |
+
"learning_rate": trial.suggest_float("learning_rate", 1e-6, 5e-5, log=True),
|
| 119 |
+
|
| 120 |
+
# 2. 训练轮数:允许训练更久,比如 3 到 8 轮 (防止欠拟合)
|
| 121 |
+
"num_train_epochs": trial.suggest_int("num_train_epochs", 3, 8),
|
| 122 |
+
|
| 123 |
+
# 3. 随机种子:尝试不同的随机种子,排除运气的成分
|
| 124 |
+
"seed": trial.suggest_int("seed", 1, 40),
|
| 125 |
+
|
| 126 |
+
# 4. Batch Size:让它在 16 和 32 之间选择
|
| 127 |
+
# (前提是你的显存能跑得动 32,如果不行就删掉这一行,固定为16)
|
| 128 |
+
"per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16]),
|
| 129 |
+
|
| 130 |
+
# 5. 权重衰减:正则化系数,防止过拟合
|
| 131 |
+
"weight_decay": trial.suggest_float("weight_decay", 0.0, 0.3),
|
| 132 |
+
|
| 133 |
+
# 6. 预热比例:调整学习率预热的步数比例
|
| 134 |
+
"warmup_ratio": trial.suggest_float("warmup_ratio", 0.0, 0.2),
|
| 135 |
+
}
|
| 136 |
+
training_args = TrainingArguments(
|
| 137 |
+
output_dir="./results_hyper_search_DEBERTA",
|
| 138 |
+
|
| 139 |
+
# !!! 激进设置:尝试 32 Batch Size !!!
|
| 140 |
+
per_device_train_batch_size=16,
|
| 141 |
+
per_device_eval_batch_size=16,
|
| 142 |
+
|
| 143 |
+
weight_decay=0.01,
|
| 144 |
+
eval_strategy="steps",
|
| 145 |
+
eval_steps=1000,
|
| 146 |
+
save_strategy="steps",
|
| 147 |
+
save_steps=1000,
|
| 148 |
+
logging_strategy="steps",
|
| 149 |
+
logging_steps=1000,
|
| 150 |
+
load_best_model_at_end=True,
|
| 151 |
+
metric_for_best_model="f1_macro",
|
| 152 |
+
greater_is_better=True,
|
| 153 |
+
save_total_limit=1,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
trainer = CustomTrainer(
|
| 157 |
+
model=None,
|
| 158 |
+
args=training_args,
|
| 159 |
+
model_init=model_init,
|
| 160 |
+
train_dataset=tokenized_train_dataset,
|
| 161 |
+
eval_dataset=tokenized_eval_dataset,
|
| 162 |
+
tokenizer=tokenizer,
|
| 163 |
+
compute_metrics=compute_metrics_macro,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
# --- 8. 开始自动超参数搜索 ---
|
| 167 |
+
print("\n" + "="*50)
|
| 168 |
+
print("🚀 [DeBERTa-v3] 开始自动超参数搜索 (Target: Macro-F1)...")
|
| 169 |
+
print("="*50)
|
| 170 |
+
|
| 171 |
+
best_run = trainer.hyperparameter_search(
|
| 172 |
+
direction="maximize",
|
| 173 |
+
n_trials=20,
|
| 174 |
+
compute_objective=compute_objective,
|
| 175 |
+
backend="optuna",
|
| 176 |
+
hp_space=my_hp_space
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
print("\n" + "="*50)
|
| 180 |
+
print("🎉 搜索完成!")
|
| 181 |
+
print("="*50)
|
| 182 |
+
print(f"最佳 Macro-F1: {best_run.objective:.4f}")
|
| 183 |
+
print("最佳参数组合:", best_run.hyperparameters)
|
| 184 |
+
|
| 185 |
+
# --- 9. 使用最佳参数进行最终训练 ---
|
| 186 |
+
print("\n" + "="*50)
|
| 187 |
+
print("🚀 [DeBERTa-v3] 使用最佳参数进行最终训练...")
|
| 188 |
+
print("="*50)
|
| 189 |
+
|
| 190 |
+
for k, v in best_run.hyperparameters.items():
|
| 191 |
+
setattr(training_args, k, v)
|
| 192 |
+
training_args.output_dir = "./results_final_best_DEBERTA"
|
| 193 |
+
training_args.logging_steps = 200
|
| 194 |
+
|
| 195 |
+
trainer = CustomTrainer(
|
| 196 |
+
model_init=model_init,
|
| 197 |
+
args=training_args,
|
| 198 |
+
train_dataset=tokenized_train_dataset,
|
| 199 |
+
eval_dataset=tokenized_eval_dataset,
|
| 200 |
+
compute_metrics=compute_metrics_macro,
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
trainer.train()
|
| 204 |
+
print("\n" + "="*50)
|
| 205 |
+
print("🎉 最终训练完成!")
|
| 206 |
+
print("="*50)
|
| 207 |
+
|
| 208 |
+
# --- 10. 保存 ---
|
| 209 |
+
final_model_path = "./final_model_deberta_macro"
|
| 210 |
+
trainer.save_model(final_model_path)
|
| 211 |
+
tokenizer.save_pretrained(final_model_path)
|
| 212 |
+
print(f"\nDeBERTa 最优模型已保存至: {final_model_path}")
|
| 213 |
+
|
| 214 |
+
print("\n--- 最终成绩单 (验证集) ---")
|
| 215 |
+
final_metrics = trainer.evaluate()
|
| 216 |
+
|
| 217 |
+
for key, value in final_metrics.items():
|
| 218 |
+
if key.startswith("eval_"):
|
| 219 |
+
key = key[5:]
|
| 220 |
+
if isinstance(value, float):
|
| 221 |
+
print(f" - {key}: {value:.4f}")
|
| 222 |
+
else:
|
| 223 |
+
print(f" - {key}: {value}")
|
test.py
ADDED
|
File without changes
|