Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- checkpoint-114/config.json +33 -0
- checkpoint-114/model.safetensors +3 -0
- checkpoint-114/optimizer.pt +3 -0
- checkpoint-114/rng_state.pth +3 -0
- checkpoint-114/scheduler.pt +3 -0
- checkpoint-114/trainer_state.json +95 -0
- checkpoint-114/training_args.bin +3 -0
- checkpoint-171/config.json +33 -0
- checkpoint-171/model.safetensors +3 -0
- checkpoint-171/optimizer.pt +3 -0
- checkpoint-171/rng_state.pth +3 -0
- checkpoint-171/scheduler.pt +3 -0
- checkpoint-171/trainer_state.json +121 -0
- checkpoint-171/training_args.bin +3 -0
- checkpoint-228/config.json +33 -0
- checkpoint-228/model.safetensors +3 -0
- checkpoint-228/optimizer.pt +3 -0
- checkpoint-228/rng_state.pth +3 -0
- checkpoint-228/scheduler.pt +3 -0
- checkpoint-228/trainer_state.json +154 -0
- checkpoint-228/training_args.bin +3 -0
- checkpoint-285/config.json +33 -0
- checkpoint-285/model.safetensors +3 -0
- checkpoint-285/optimizer.pt +3 -0
- checkpoint-285/rng_state.pth +3 -0
- checkpoint-285/scheduler.pt +3 -0
- checkpoint-285/trainer_state.json +180 -0
- checkpoint-285/training_args.bin +3 -0
- checkpoint-342/config.json +33 -0
- checkpoint-342/model.safetensors +3 -0
- checkpoint-342/optimizer.pt +3 -0
- checkpoint-342/rng_state.pth +3 -0
- checkpoint-342/scheduler.pt +3 -0
- checkpoint-342/trainer_state.json +206 -0
- checkpoint-342/training_args.bin +3 -0
- checkpoint-399/config.json +33 -0
- checkpoint-399/model.safetensors +3 -0
- checkpoint-399/optimizer.pt +3 -0
- checkpoint-399/rng_state.pth +3 -0
- checkpoint-399/scheduler.pt +3 -0
- checkpoint-399/trainer_state.json +232 -0
- checkpoint-399/training_args.bin +3 -0
- checkpoint-456/config.json +33 -0
- checkpoint-456/model.safetensors +3 -0
- checkpoint-456/optimizer.pt +3 -0
- checkpoint-456/rng_state.pth +3 -0
- checkpoint-456/scheduler.pt +3 -0
- checkpoint-456/trainer_state.json +265 -0
- checkpoint-456/training_args.bin +3 -0
- checkpoint-513/config.json +33 -0
checkpoint-114/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
checkpoint-114/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba07e64572fb03035e6ad53dc30c7db64d902eb4157cb66935426945358712f1
|
| 3 |
+
size 437958648
|
checkpoint-114/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b118ab24d14452e0fcb395f1e9e76053c0ac34053f040c46497c931b9ff5b04
|
| 3 |
+
size 876033163
|
checkpoint-114/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f1e0d31acc437fbbd16411d0d11d500c5f4dbbc7561671dab7dbf23eb0f2c43
|
| 3 |
+
size 14455
|
checkpoint-114/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c25199463fa046ddd18b57985caa892648a50605fcbefcaff1fd9e9d82c8e75
|
| 3 |
+
size 1465
|
checkpoint-114/trainer_state.json
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 114,
|
| 3 |
+
"best_metric": 0.4482758620689655,
|
| 4 |
+
"best_model_checkpoint": "./ai_detector_model_v10/checkpoint-114",
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 114,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.43859649122807015,
|
| 14 |
+
"grad_norm": 2.53117299079895,
|
| 15 |
+
"learning_rate": 1.2000000000000002e-07,
|
| 16 |
+
"loss": 0.6905,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.8771929824561403,
|
| 21 |
+
"grad_norm": 3.241912841796875,
|
| 22 |
+
"learning_rate": 2.45e-07,
|
| 23 |
+
"loss": 0.6869,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.0,
|
| 28 |
+
"eval_accuracy": 0.5,
|
| 29 |
+
"eval_f1": 0.16666666666666666,
|
| 30 |
+
"eval_loss": 0.6886580586433411,
|
| 31 |
+
"eval_precision": 0.8,
|
| 32 |
+
"eval_recall": 0.09302325581395349,
|
| 33 |
+
"eval_runtime": 4.3688,
|
| 34 |
+
"eval_samples_per_second": 18.312,
|
| 35 |
+
"eval_steps_per_second": 2.289,
|
| 36 |
+
"step": 57
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.3157894736842106,
|
| 40 |
+
"grad_norm": 2.8000667095184326,
|
| 41 |
+
"learning_rate": 3.7e-07,
|
| 42 |
+
"loss": 0.6872,
|
| 43 |
+
"step": 75
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1.7543859649122808,
|
| 47 |
+
"grad_norm": 3.0732340812683105,
|
| 48 |
+
"learning_rate": 4.95e-07,
|
| 49 |
+
"loss": 0.6721,
|
| 50 |
+
"step": 100
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 2.0,
|
| 54 |
+
"eval_accuracy": 0.6,
|
| 55 |
+
"eval_f1": 0.4482758620689655,
|
| 56 |
+
"eval_loss": 0.6631786227226257,
|
| 57 |
+
"eval_precision": 0.8666666666666667,
|
| 58 |
+
"eval_recall": 0.3023255813953488,
|
| 59 |
+
"eval_runtime": 3.4954,
|
| 60 |
+
"eval_samples_per_second": 22.887,
|
| 61 |
+
"eval_steps_per_second": 2.861,
|
| 62 |
+
"step": 114
|
| 63 |
+
}
|
| 64 |
+
],
|
| 65 |
+
"logging_steps": 25,
|
| 66 |
+
"max_steps": 1140,
|
| 67 |
+
"num_input_tokens_seen": 0,
|
| 68 |
+
"num_train_epochs": 20,
|
| 69 |
+
"save_steps": 500,
|
| 70 |
+
"stateful_callbacks": {
|
| 71 |
+
"EarlyStoppingCallback": {
|
| 72 |
+
"args": {
|
| 73 |
+
"early_stopping_patience": 5,
|
| 74 |
+
"early_stopping_threshold": 0.0
|
| 75 |
+
},
|
| 76 |
+
"attributes": {
|
| 77 |
+
"early_stopping_patience_counter": 0
|
| 78 |
+
}
|
| 79 |
+
},
|
| 80 |
+
"TrainerControl": {
|
| 81 |
+
"args": {
|
| 82 |
+
"should_epoch_stop": false,
|
| 83 |
+
"should_evaluate": false,
|
| 84 |
+
"should_log": false,
|
| 85 |
+
"should_save": true,
|
| 86 |
+
"should_training_stop": false
|
| 87 |
+
},
|
| 88 |
+
"attributes": {}
|
| 89 |
+
}
|
| 90 |
+
},
|
| 91 |
+
"total_flos": 236799949824000.0,
|
| 92 |
+
"train_batch_size": 8,
|
| 93 |
+
"trial_name": null,
|
| 94 |
+
"trial_params": null
|
| 95 |
+
}
|
checkpoint-114/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
|
| 3 |
+
size 5777
|
checkpoint-171/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
checkpoint-171/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55009821690e917db1114dbc848baec4d7739ac9286431f21472af667a8eddf7
|
| 3 |
+
size 437958648
|
checkpoint-171/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09c521d9bd88b4ab3f2cf6491df2b5355f289b80dda6f61e2e3c9f6e4ba347c9
|
| 3 |
+
size 876033163
|
checkpoint-171/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:391d01d3aeb4a35151817d446e4ba0b9c8a04084ae1b1b66eda188a30729da0a
|
| 3 |
+
size 14455
|
checkpoint-171/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:891b886d78f6365df6b5329e3942b03490ef41cb2ce6ea4d05691c57a4531bac
|
| 3 |
+
size 1465
|
checkpoint-171/trainer_state.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 171,
|
| 3 |
+
"best_metric": 0.9195402298850575,
|
| 4 |
+
"best_model_checkpoint": "./ai_detector_model_v10/checkpoint-171",
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 171,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.43859649122807015,
|
| 14 |
+
"grad_norm": 2.53117299079895,
|
| 15 |
+
"learning_rate": 1.2000000000000002e-07,
|
| 16 |
+
"loss": 0.6905,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.8771929824561403,
|
| 21 |
+
"grad_norm": 3.241912841796875,
|
| 22 |
+
"learning_rate": 2.45e-07,
|
| 23 |
+
"loss": 0.6869,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.0,
|
| 28 |
+
"eval_accuracy": 0.5,
|
| 29 |
+
"eval_f1": 0.16666666666666666,
|
| 30 |
+
"eval_loss": 0.6886580586433411,
|
| 31 |
+
"eval_precision": 0.8,
|
| 32 |
+
"eval_recall": 0.09302325581395349,
|
| 33 |
+
"eval_runtime": 4.3688,
|
| 34 |
+
"eval_samples_per_second": 18.312,
|
| 35 |
+
"eval_steps_per_second": 2.289,
|
| 36 |
+
"step": 57
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.3157894736842106,
|
| 40 |
+
"grad_norm": 2.8000667095184326,
|
| 41 |
+
"learning_rate": 3.7e-07,
|
| 42 |
+
"loss": 0.6872,
|
| 43 |
+
"step": 75
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1.7543859649122808,
|
| 47 |
+
"grad_norm": 3.0732340812683105,
|
| 48 |
+
"learning_rate": 4.95e-07,
|
| 49 |
+
"loss": 0.6721,
|
| 50 |
+
"step": 100
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 2.0,
|
| 54 |
+
"eval_accuracy": 0.6,
|
| 55 |
+
"eval_f1": 0.4482758620689655,
|
| 56 |
+
"eval_loss": 0.6631786227226257,
|
| 57 |
+
"eval_precision": 0.8666666666666667,
|
| 58 |
+
"eval_recall": 0.3023255813953488,
|
| 59 |
+
"eval_runtime": 3.4954,
|
| 60 |
+
"eval_samples_per_second": 22.887,
|
| 61 |
+
"eval_steps_per_second": 2.861,
|
| 62 |
+
"step": 114
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"epoch": 2.192982456140351,
|
| 66 |
+
"grad_norm": 5.148472309112549,
|
| 67 |
+
"learning_rate": 6.2e-07,
|
| 68 |
+
"loss": 0.6639,
|
| 69 |
+
"step": 125
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 2.6315789473684212,
|
| 73 |
+
"grad_norm": 4.3272223472595215,
|
| 74 |
+
"learning_rate": 7.45e-07,
|
| 75 |
+
"loss": 0.6423,
|
| 76 |
+
"step": 150
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 3.0,
|
| 80 |
+
"eval_accuracy": 0.9125,
|
| 81 |
+
"eval_f1": 0.9195402298850575,
|
| 82 |
+
"eval_loss": 0.6100292801856995,
|
| 83 |
+
"eval_precision": 0.9090909090909091,
|
| 84 |
+
"eval_recall": 0.9302325581395349,
|
| 85 |
+
"eval_runtime": 3.5042,
|
| 86 |
+
"eval_samples_per_second": 22.829,
|
| 87 |
+
"eval_steps_per_second": 2.854,
|
| 88 |
+
"step": 171
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"logging_steps": 25,
|
| 92 |
+
"max_steps": 1140,
|
| 93 |
+
"num_input_tokens_seen": 0,
|
| 94 |
+
"num_train_epochs": 20,
|
| 95 |
+
"save_steps": 500,
|
| 96 |
+
"stateful_callbacks": {
|
| 97 |
+
"EarlyStoppingCallback": {
|
| 98 |
+
"args": {
|
| 99 |
+
"early_stopping_patience": 5,
|
| 100 |
+
"early_stopping_threshold": 0.0
|
| 101 |
+
},
|
| 102 |
+
"attributes": {
|
| 103 |
+
"early_stopping_patience_counter": 0
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"TrainerControl": {
|
| 107 |
+
"args": {
|
| 108 |
+
"should_epoch_stop": false,
|
| 109 |
+
"should_evaluate": false,
|
| 110 |
+
"should_log": false,
|
| 111 |
+
"should_save": true,
|
| 112 |
+
"should_training_stop": false
|
| 113 |
+
},
|
| 114 |
+
"attributes": {}
|
| 115 |
+
}
|
| 116 |
+
},
|
| 117 |
+
"total_flos": 355199924736000.0,
|
| 118 |
+
"train_batch_size": 8,
|
| 119 |
+
"trial_name": null,
|
| 120 |
+
"trial_params": null
|
| 121 |
+
}
|
checkpoint-171/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
|
| 3 |
+
size 5777
|
checkpoint-228/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
checkpoint-228/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dff874543e53030922925ea8f8650cb4f2e2a46217f2fa2a688bef7c055f9cd9
|
| 3 |
+
size 437958648
|
checkpoint-228/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8eee46e5fb474bc5a87dd72d542443661ffebd3bcd8f64daef357d98ba3b03a8
|
| 3 |
+
size 876033163
|
checkpoint-228/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67627e3b026c4c5d776980914bd7f99f2f9814ae6ac5a3bd1d93ee8d2ff6784f
|
| 3 |
+
size 14455
|
checkpoint-228/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:977b9b2e7012dd8b09d3a15f4d4c2a57908497bc25ffbbd86cffb63b01ef4771
|
| 3 |
+
size 1465
|
checkpoint-228/trainer_state.json
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 228,
|
| 3 |
+
"best_metric": 0.945054945054945,
|
| 4 |
+
"best_model_checkpoint": "./ai_detector_model_v10/checkpoint-228",
|
| 5 |
+
"epoch": 4.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 228,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.43859649122807015,
|
| 14 |
+
"grad_norm": 2.53117299079895,
|
| 15 |
+
"learning_rate": 1.2000000000000002e-07,
|
| 16 |
+
"loss": 0.6905,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.8771929824561403,
|
| 21 |
+
"grad_norm": 3.241912841796875,
|
| 22 |
+
"learning_rate": 2.45e-07,
|
| 23 |
+
"loss": 0.6869,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.0,
|
| 28 |
+
"eval_accuracy": 0.5,
|
| 29 |
+
"eval_f1": 0.16666666666666666,
|
| 30 |
+
"eval_loss": 0.6886580586433411,
|
| 31 |
+
"eval_precision": 0.8,
|
| 32 |
+
"eval_recall": 0.09302325581395349,
|
| 33 |
+
"eval_runtime": 4.3688,
|
| 34 |
+
"eval_samples_per_second": 18.312,
|
| 35 |
+
"eval_steps_per_second": 2.289,
|
| 36 |
+
"step": 57
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.3157894736842106,
|
| 40 |
+
"grad_norm": 2.8000667095184326,
|
| 41 |
+
"learning_rate": 3.7e-07,
|
| 42 |
+
"loss": 0.6872,
|
| 43 |
+
"step": 75
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1.7543859649122808,
|
| 47 |
+
"grad_norm": 3.0732340812683105,
|
| 48 |
+
"learning_rate": 4.95e-07,
|
| 49 |
+
"loss": 0.6721,
|
| 50 |
+
"step": 100
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 2.0,
|
| 54 |
+
"eval_accuracy": 0.6,
|
| 55 |
+
"eval_f1": 0.4482758620689655,
|
| 56 |
+
"eval_loss": 0.6631786227226257,
|
| 57 |
+
"eval_precision": 0.8666666666666667,
|
| 58 |
+
"eval_recall": 0.3023255813953488,
|
| 59 |
+
"eval_runtime": 3.4954,
|
| 60 |
+
"eval_samples_per_second": 22.887,
|
| 61 |
+
"eval_steps_per_second": 2.861,
|
| 62 |
+
"step": 114
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"epoch": 2.192982456140351,
|
| 66 |
+
"grad_norm": 5.148472309112549,
|
| 67 |
+
"learning_rate": 6.2e-07,
|
| 68 |
+
"loss": 0.6639,
|
| 69 |
+
"step": 125
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 2.6315789473684212,
|
| 73 |
+
"grad_norm": 4.3272223472595215,
|
| 74 |
+
"learning_rate": 7.45e-07,
|
| 75 |
+
"loss": 0.6423,
|
| 76 |
+
"step": 150
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 3.0,
|
| 80 |
+
"eval_accuracy": 0.9125,
|
| 81 |
+
"eval_f1": 0.9195402298850575,
|
| 82 |
+
"eval_loss": 0.6100292801856995,
|
| 83 |
+
"eval_precision": 0.9090909090909091,
|
| 84 |
+
"eval_recall": 0.9302325581395349,
|
| 85 |
+
"eval_runtime": 3.5042,
|
| 86 |
+
"eval_samples_per_second": 22.829,
|
| 87 |
+
"eval_steps_per_second": 2.854,
|
| 88 |
+
"step": 171
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"epoch": 3.0701754385964914,
|
| 92 |
+
"grad_norm": 4.4310302734375,
|
| 93 |
+
"learning_rate": 8.699999999999999e-07,
|
| 94 |
+
"loss": 0.6113,
|
| 95 |
+
"step": 175
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"epoch": 3.5087719298245617,
|
| 99 |
+
"grad_norm": 3.7883880138397217,
|
| 100 |
+
"learning_rate": 9.95e-07,
|
| 101 |
+
"loss": 0.587,
|
| 102 |
+
"step": 200
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 3.9473684210526314,
|
| 106 |
+
"grad_norm": 4.989749908447266,
|
| 107 |
+
"learning_rate": 1.12e-06,
|
| 108 |
+
"loss": 0.5664,
|
| 109 |
+
"step": 225
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"epoch": 4.0,
|
| 113 |
+
"eval_accuracy": 0.9375,
|
| 114 |
+
"eval_f1": 0.945054945054945,
|
| 115 |
+
"eval_loss": 0.5416288375854492,
|
| 116 |
+
"eval_precision": 0.8958333333333334,
|
| 117 |
+
"eval_recall": 1.0,
|
| 118 |
+
"eval_runtime": 3.3989,
|
| 119 |
+
"eval_samples_per_second": 23.537,
|
| 120 |
+
"eval_steps_per_second": 2.942,
|
| 121 |
+
"step": 228
|
| 122 |
+
}
|
| 123 |
+
],
|
| 124 |
+
"logging_steps": 25,
|
| 125 |
+
"max_steps": 1140,
|
| 126 |
+
"num_input_tokens_seen": 0,
|
| 127 |
+
"num_train_epochs": 20,
|
| 128 |
+
"save_steps": 500,
|
| 129 |
+
"stateful_callbacks": {
|
| 130 |
+
"EarlyStoppingCallback": {
|
| 131 |
+
"args": {
|
| 132 |
+
"early_stopping_patience": 5,
|
| 133 |
+
"early_stopping_threshold": 0.0
|
| 134 |
+
},
|
| 135 |
+
"attributes": {
|
| 136 |
+
"early_stopping_patience_counter": 0
|
| 137 |
+
}
|
| 138 |
+
},
|
| 139 |
+
"TrainerControl": {
|
| 140 |
+
"args": {
|
| 141 |
+
"should_epoch_stop": false,
|
| 142 |
+
"should_evaluate": false,
|
| 143 |
+
"should_log": false,
|
| 144 |
+
"should_save": true,
|
| 145 |
+
"should_training_stop": false
|
| 146 |
+
},
|
| 147 |
+
"attributes": {}
|
| 148 |
+
}
|
| 149 |
+
},
|
| 150 |
+
"total_flos": 473599899648000.0,
|
| 151 |
+
"train_batch_size": 8,
|
| 152 |
+
"trial_name": null,
|
| 153 |
+
"trial_params": null
|
| 154 |
+
}
|
checkpoint-228/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
|
| 3 |
+
size 5777
|
checkpoint-285/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
checkpoint-285/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:243655f60fd2a53b6f348a75ebbb0ac4df9a5236a4b0f72379bfa1bd3643c4f0
|
| 3 |
+
size 437958648
|
checkpoint-285/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a569ec9245736253f61c9d622a5d3ca2ff56b2f6ad0d0669c59a5cba04e6cab2
|
| 3 |
+
size 876033163
|
checkpoint-285/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b652c6269b998b96ab924b2734c0818fab436c642524e13fc6cd4d9082e62b5
|
| 3 |
+
size 14455
|
checkpoint-285/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be42d5eaf34fdb700c80731c58d1f751f473aad4e6a622406045abb719430309
|
| 3 |
+
size 1465
|
checkpoint-285/trainer_state.json
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 285,
|
| 3 |
+
"best_metric": 0.9662921348314607,
|
| 4 |
+
"best_model_checkpoint": "./ai_detector_model_v10/checkpoint-285",
|
| 5 |
+
"epoch": 5.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 285,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.43859649122807015,
|
| 14 |
+
"grad_norm": 2.53117299079895,
|
| 15 |
+
"learning_rate": 1.2000000000000002e-07,
|
| 16 |
+
"loss": 0.6905,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.8771929824561403,
|
| 21 |
+
"grad_norm": 3.241912841796875,
|
| 22 |
+
"learning_rate": 2.45e-07,
|
| 23 |
+
"loss": 0.6869,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.0,
|
| 28 |
+
"eval_accuracy": 0.5,
|
| 29 |
+
"eval_f1": 0.16666666666666666,
|
| 30 |
+
"eval_loss": 0.6886580586433411,
|
| 31 |
+
"eval_precision": 0.8,
|
| 32 |
+
"eval_recall": 0.09302325581395349,
|
| 33 |
+
"eval_runtime": 4.3688,
|
| 34 |
+
"eval_samples_per_second": 18.312,
|
| 35 |
+
"eval_steps_per_second": 2.289,
|
| 36 |
+
"step": 57
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.3157894736842106,
|
| 40 |
+
"grad_norm": 2.8000667095184326,
|
| 41 |
+
"learning_rate": 3.7e-07,
|
| 42 |
+
"loss": 0.6872,
|
| 43 |
+
"step": 75
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1.7543859649122808,
|
| 47 |
+
"grad_norm": 3.0732340812683105,
|
| 48 |
+
"learning_rate": 4.95e-07,
|
| 49 |
+
"loss": 0.6721,
|
| 50 |
+
"step": 100
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 2.0,
|
| 54 |
+
"eval_accuracy": 0.6,
|
| 55 |
+
"eval_f1": 0.4482758620689655,
|
| 56 |
+
"eval_loss": 0.6631786227226257,
|
| 57 |
+
"eval_precision": 0.8666666666666667,
|
| 58 |
+
"eval_recall": 0.3023255813953488,
|
| 59 |
+
"eval_runtime": 3.4954,
|
| 60 |
+
"eval_samples_per_second": 22.887,
|
| 61 |
+
"eval_steps_per_second": 2.861,
|
| 62 |
+
"step": 114
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"epoch": 2.192982456140351,
|
| 66 |
+
"grad_norm": 5.148472309112549,
|
| 67 |
+
"learning_rate": 6.2e-07,
|
| 68 |
+
"loss": 0.6639,
|
| 69 |
+
"step": 125
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 2.6315789473684212,
|
| 73 |
+
"grad_norm": 4.3272223472595215,
|
| 74 |
+
"learning_rate": 7.45e-07,
|
| 75 |
+
"loss": 0.6423,
|
| 76 |
+
"step": 150
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 3.0,
|
| 80 |
+
"eval_accuracy": 0.9125,
|
| 81 |
+
"eval_f1": 0.9195402298850575,
|
| 82 |
+
"eval_loss": 0.6100292801856995,
|
| 83 |
+
"eval_precision": 0.9090909090909091,
|
| 84 |
+
"eval_recall": 0.9302325581395349,
|
| 85 |
+
"eval_runtime": 3.5042,
|
| 86 |
+
"eval_samples_per_second": 22.829,
|
| 87 |
+
"eval_steps_per_second": 2.854,
|
| 88 |
+
"step": 171
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"epoch": 3.0701754385964914,
|
| 92 |
+
"grad_norm": 4.4310302734375,
|
| 93 |
+
"learning_rate": 8.699999999999999e-07,
|
| 94 |
+
"loss": 0.6113,
|
| 95 |
+
"step": 175
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"epoch": 3.5087719298245617,
|
| 99 |
+
"grad_norm": 3.7883880138397217,
|
| 100 |
+
"learning_rate": 9.95e-07,
|
| 101 |
+
"loss": 0.587,
|
| 102 |
+
"step": 200
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 3.9473684210526314,
|
| 106 |
+
"grad_norm": 4.989749908447266,
|
| 107 |
+
"learning_rate": 1.12e-06,
|
| 108 |
+
"loss": 0.5664,
|
| 109 |
+
"step": 225
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"epoch": 4.0,
|
| 113 |
+
"eval_accuracy": 0.9375,
|
| 114 |
+
"eval_f1": 0.945054945054945,
|
| 115 |
+
"eval_loss": 0.5416288375854492,
|
| 116 |
+
"eval_precision": 0.8958333333333334,
|
| 117 |
+
"eval_recall": 1.0,
|
| 118 |
+
"eval_runtime": 3.3989,
|
| 119 |
+
"eval_samples_per_second": 23.537,
|
| 120 |
+
"eval_steps_per_second": 2.942,
|
| 121 |
+
"step": 228
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 4.385964912280702,
|
| 125 |
+
"grad_norm": 4.706058979034424,
|
| 126 |
+
"learning_rate": 1.245e-06,
|
| 127 |
+
"loss": 0.533,
|
| 128 |
+
"step": 250
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 4.824561403508772,
|
| 132 |
+
"grad_norm": 4.749231338500977,
|
| 133 |
+
"learning_rate": 1.37e-06,
|
| 134 |
+
"loss": 0.5036,
|
| 135 |
+
"step": 275
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 5.0,
|
| 139 |
+
"eval_accuracy": 0.9625,
|
| 140 |
+
"eval_f1": 0.9662921348314607,
|
| 141 |
+
"eval_loss": 0.45674929022789,
|
| 142 |
+
"eval_precision": 0.9347826086956522,
|
| 143 |
+
"eval_recall": 1.0,
|
| 144 |
+
"eval_runtime": 3.3705,
|
| 145 |
+
"eval_samples_per_second": 23.735,
|
| 146 |
+
"eval_steps_per_second": 2.967,
|
| 147 |
+
"step": 285
|
| 148 |
+
}
|
| 149 |
+
],
|
| 150 |
+
"logging_steps": 25,
|
| 151 |
+
"max_steps": 1140,
|
| 152 |
+
"num_input_tokens_seen": 0,
|
| 153 |
+
"num_train_epochs": 20,
|
| 154 |
+
"save_steps": 500,
|
| 155 |
+
"stateful_callbacks": {
|
| 156 |
+
"EarlyStoppingCallback": {
|
| 157 |
+
"args": {
|
| 158 |
+
"early_stopping_patience": 5,
|
| 159 |
+
"early_stopping_threshold": 0.0
|
| 160 |
+
},
|
| 161 |
+
"attributes": {
|
| 162 |
+
"early_stopping_patience_counter": 0
|
| 163 |
+
}
|
| 164 |
+
},
|
| 165 |
+
"TrainerControl": {
|
| 166 |
+
"args": {
|
| 167 |
+
"should_epoch_stop": false,
|
| 168 |
+
"should_evaluate": false,
|
| 169 |
+
"should_log": false,
|
| 170 |
+
"should_save": true,
|
| 171 |
+
"should_training_stop": false
|
| 172 |
+
},
|
| 173 |
+
"attributes": {}
|
| 174 |
+
}
|
| 175 |
+
},
|
| 176 |
+
"total_flos": 591999874560000.0,
|
| 177 |
+
"train_batch_size": 8,
|
| 178 |
+
"trial_name": null,
|
| 179 |
+
"trial_params": null
|
| 180 |
+
}
|
checkpoint-285/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
|
| 3 |
+
size 5777
|
checkpoint-342/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
checkpoint-342/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f72b00d2d7a1ae64b7adf1bbaf6c729c1965c99bd5f45fa684140d8892009ab
|
| 3 |
+
size 437958648
|
checkpoint-342/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da84ddd3ff2865d4c34a9d078b89e224d1cc0d5f04eeed698cfce024979210b5
|
| 3 |
+
size 876033163
|
checkpoint-342/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e38e4bded6fa666a78b369223010c84f8eafdd4ce4069224aa6f2854b4222440
|
| 3 |
+
size 14455
|
checkpoint-342/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbb7e7c352d53a21fe634efbd24e6a75fe26234a1854ab93ea6f0ab92429f7e8
|
| 3 |
+
size 1465
|
checkpoint-342/trainer_state.json
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 342,
|
| 3 |
+
"best_metric": 0.9772727272727273,
|
| 4 |
+
"best_model_checkpoint": "./ai_detector_model_v10/checkpoint-342",
|
| 5 |
+
"epoch": 6.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 342,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.43859649122807015,
|
| 14 |
+
"grad_norm": 2.53117299079895,
|
| 15 |
+
"learning_rate": 1.2000000000000002e-07,
|
| 16 |
+
"loss": 0.6905,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.8771929824561403,
|
| 21 |
+
"grad_norm": 3.241912841796875,
|
| 22 |
+
"learning_rate": 2.45e-07,
|
| 23 |
+
"loss": 0.6869,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.0,
|
| 28 |
+
"eval_accuracy": 0.5,
|
| 29 |
+
"eval_f1": 0.16666666666666666,
|
| 30 |
+
"eval_loss": 0.6886580586433411,
|
| 31 |
+
"eval_precision": 0.8,
|
| 32 |
+
"eval_recall": 0.09302325581395349,
|
| 33 |
+
"eval_runtime": 4.3688,
|
| 34 |
+
"eval_samples_per_second": 18.312,
|
| 35 |
+
"eval_steps_per_second": 2.289,
|
| 36 |
+
"step": 57
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.3157894736842106,
|
| 40 |
+
"grad_norm": 2.8000667095184326,
|
| 41 |
+
"learning_rate": 3.7e-07,
|
| 42 |
+
"loss": 0.6872,
|
| 43 |
+
"step": 75
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1.7543859649122808,
|
| 47 |
+
"grad_norm": 3.0732340812683105,
|
| 48 |
+
"learning_rate": 4.95e-07,
|
| 49 |
+
"loss": 0.6721,
|
| 50 |
+
"step": 100
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 2.0,
|
| 54 |
+
"eval_accuracy": 0.6,
|
| 55 |
+
"eval_f1": 0.4482758620689655,
|
| 56 |
+
"eval_loss": 0.6631786227226257,
|
| 57 |
+
"eval_precision": 0.8666666666666667,
|
| 58 |
+
"eval_recall": 0.3023255813953488,
|
| 59 |
+
"eval_runtime": 3.4954,
|
| 60 |
+
"eval_samples_per_second": 22.887,
|
| 61 |
+
"eval_steps_per_second": 2.861,
|
| 62 |
+
"step": 114
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"epoch": 2.192982456140351,
|
| 66 |
+
"grad_norm": 5.148472309112549,
|
| 67 |
+
"learning_rate": 6.2e-07,
|
| 68 |
+
"loss": 0.6639,
|
| 69 |
+
"step": 125
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 2.6315789473684212,
|
| 73 |
+
"grad_norm": 4.3272223472595215,
|
| 74 |
+
"learning_rate": 7.45e-07,
|
| 75 |
+
"loss": 0.6423,
|
| 76 |
+
"step": 150
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 3.0,
|
| 80 |
+
"eval_accuracy": 0.9125,
|
| 81 |
+
"eval_f1": 0.9195402298850575,
|
| 82 |
+
"eval_loss": 0.6100292801856995,
|
| 83 |
+
"eval_precision": 0.9090909090909091,
|
| 84 |
+
"eval_recall": 0.9302325581395349,
|
| 85 |
+
"eval_runtime": 3.5042,
|
| 86 |
+
"eval_samples_per_second": 22.829,
|
| 87 |
+
"eval_steps_per_second": 2.854,
|
| 88 |
+
"step": 171
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"epoch": 3.0701754385964914,
|
| 92 |
+
"grad_norm": 4.4310302734375,
|
| 93 |
+
"learning_rate": 8.699999999999999e-07,
|
| 94 |
+
"loss": 0.6113,
|
| 95 |
+
"step": 175
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"epoch": 3.5087719298245617,
|
| 99 |
+
"grad_norm": 3.7883880138397217,
|
| 100 |
+
"learning_rate": 9.95e-07,
|
| 101 |
+
"loss": 0.587,
|
| 102 |
+
"step": 200
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 3.9473684210526314,
|
| 106 |
+
"grad_norm": 4.989749908447266,
|
| 107 |
+
"learning_rate": 1.12e-06,
|
| 108 |
+
"loss": 0.5664,
|
| 109 |
+
"step": 225
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"epoch": 4.0,
|
| 113 |
+
"eval_accuracy": 0.9375,
|
| 114 |
+
"eval_f1": 0.945054945054945,
|
| 115 |
+
"eval_loss": 0.5416288375854492,
|
| 116 |
+
"eval_precision": 0.8958333333333334,
|
| 117 |
+
"eval_recall": 1.0,
|
| 118 |
+
"eval_runtime": 3.3989,
|
| 119 |
+
"eval_samples_per_second": 23.537,
|
| 120 |
+
"eval_steps_per_second": 2.942,
|
| 121 |
+
"step": 228
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 4.385964912280702,
|
| 125 |
+
"grad_norm": 4.706058979034424,
|
| 126 |
+
"learning_rate": 1.245e-06,
|
| 127 |
+
"loss": 0.533,
|
| 128 |
+
"step": 250
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 4.824561403508772,
|
| 132 |
+
"grad_norm": 4.749231338500977,
|
| 133 |
+
"learning_rate": 1.37e-06,
|
| 134 |
+
"loss": 0.5036,
|
| 135 |
+
"step": 275
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 5.0,
|
| 139 |
+
"eval_accuracy": 0.9625,
|
| 140 |
+
"eval_f1": 0.9662921348314607,
|
| 141 |
+
"eval_loss": 0.45674929022789,
|
| 142 |
+
"eval_precision": 0.9347826086956522,
|
| 143 |
+
"eval_recall": 1.0,
|
| 144 |
+
"eval_runtime": 3.3705,
|
| 145 |
+
"eval_samples_per_second": 23.735,
|
| 146 |
+
"eval_steps_per_second": 2.967,
|
| 147 |
+
"step": 285
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"epoch": 5.2631578947368425,
|
| 151 |
+
"grad_norm": 5.614340782165527,
|
| 152 |
+
"learning_rate": 1.495e-06,
|
| 153 |
+
"loss": 0.451,
|
| 154 |
+
"step": 300
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 5.701754385964913,
|
| 158 |
+
"grad_norm": 3.3072502613067627,
|
| 159 |
+
"learning_rate": 1.6200000000000002e-06,
|
| 160 |
+
"loss": 0.4289,
|
| 161 |
+
"step": 325
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 6.0,
|
| 165 |
+
"eval_accuracy": 0.975,
|
| 166 |
+
"eval_f1": 0.9772727272727273,
|
| 167 |
+
"eval_loss": 0.3828332722187042,
|
| 168 |
+
"eval_precision": 0.9555555555555556,
|
| 169 |
+
"eval_recall": 1.0,
|
| 170 |
+
"eval_runtime": 3.3562,
|
| 171 |
+
"eval_samples_per_second": 23.836,
|
| 172 |
+
"eval_steps_per_second": 2.98,
|
| 173 |
+
"step": 342
|
| 174 |
+
}
|
| 175 |
+
],
|
| 176 |
+
"logging_steps": 25,
|
| 177 |
+
"max_steps": 1140,
|
| 178 |
+
"num_input_tokens_seen": 0,
|
| 179 |
+
"num_train_epochs": 20,
|
| 180 |
+
"save_steps": 500,
|
| 181 |
+
"stateful_callbacks": {
|
| 182 |
+
"EarlyStoppingCallback": {
|
| 183 |
+
"args": {
|
| 184 |
+
"early_stopping_patience": 5,
|
| 185 |
+
"early_stopping_threshold": 0.0
|
| 186 |
+
},
|
| 187 |
+
"attributes": {
|
| 188 |
+
"early_stopping_patience_counter": 0
|
| 189 |
+
}
|
| 190 |
+
},
|
| 191 |
+
"TrainerControl": {
|
| 192 |
+
"args": {
|
| 193 |
+
"should_epoch_stop": false,
|
| 194 |
+
"should_evaluate": false,
|
| 195 |
+
"should_log": false,
|
| 196 |
+
"should_save": true,
|
| 197 |
+
"should_training_stop": false
|
| 198 |
+
},
|
| 199 |
+
"attributes": {}
|
| 200 |
+
}
|
| 201 |
+
},
|
| 202 |
+
"total_flos": 710399849472000.0,
|
| 203 |
+
"train_batch_size": 8,
|
| 204 |
+
"trial_name": null,
|
| 205 |
+
"trial_params": null
|
| 206 |
+
}
|
checkpoint-342/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
|
| 3 |
+
size 5777
|
checkpoint-399/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
checkpoint-399/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c6be057716b0865c0670b59edfdfba482333cdf657324e9764ca2b22f71596d
|
| 3 |
+
size 437958648
|
checkpoint-399/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc94edb2f9de087f036df04a8f8e77a07959e3b8de6476bc1d82ec96638399a5
|
| 3 |
+
size 876033163
|
checkpoint-399/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f13fa096c82c4698939c19f78b0766bd5c2a2142c49acdf650aabfa34b99d7f1
|
| 3 |
+
size 14455
|
checkpoint-399/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a608ccd4279d7f75d5dec35e3fdfbc4a49180b3b225e3a554b45081a544a1e5
|
| 3 |
+
size 1465
|
checkpoint-399/trainer_state.json
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 399,
|
| 3 |
+
"best_metric": 1.0,
|
| 4 |
+
"best_model_checkpoint": "./ai_detector_model_v10/checkpoint-399",
|
| 5 |
+
"epoch": 7.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 399,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.43859649122807015,
|
| 14 |
+
"grad_norm": 2.53117299079895,
|
| 15 |
+
"learning_rate": 1.2000000000000002e-07,
|
| 16 |
+
"loss": 0.6905,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.8771929824561403,
|
| 21 |
+
"grad_norm": 3.241912841796875,
|
| 22 |
+
"learning_rate": 2.45e-07,
|
| 23 |
+
"loss": 0.6869,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.0,
|
| 28 |
+
"eval_accuracy": 0.5,
|
| 29 |
+
"eval_f1": 0.16666666666666666,
|
| 30 |
+
"eval_loss": 0.6886580586433411,
|
| 31 |
+
"eval_precision": 0.8,
|
| 32 |
+
"eval_recall": 0.09302325581395349,
|
| 33 |
+
"eval_runtime": 4.3688,
|
| 34 |
+
"eval_samples_per_second": 18.312,
|
| 35 |
+
"eval_steps_per_second": 2.289,
|
| 36 |
+
"step": 57
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.3157894736842106,
|
| 40 |
+
"grad_norm": 2.8000667095184326,
|
| 41 |
+
"learning_rate": 3.7e-07,
|
| 42 |
+
"loss": 0.6872,
|
| 43 |
+
"step": 75
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1.7543859649122808,
|
| 47 |
+
"grad_norm": 3.0732340812683105,
|
| 48 |
+
"learning_rate": 4.95e-07,
|
| 49 |
+
"loss": 0.6721,
|
| 50 |
+
"step": 100
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 2.0,
|
| 54 |
+
"eval_accuracy": 0.6,
|
| 55 |
+
"eval_f1": 0.4482758620689655,
|
| 56 |
+
"eval_loss": 0.6631786227226257,
|
| 57 |
+
"eval_precision": 0.8666666666666667,
|
| 58 |
+
"eval_recall": 0.3023255813953488,
|
| 59 |
+
"eval_runtime": 3.4954,
|
| 60 |
+
"eval_samples_per_second": 22.887,
|
| 61 |
+
"eval_steps_per_second": 2.861,
|
| 62 |
+
"step": 114
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"epoch": 2.192982456140351,
|
| 66 |
+
"grad_norm": 5.148472309112549,
|
| 67 |
+
"learning_rate": 6.2e-07,
|
| 68 |
+
"loss": 0.6639,
|
| 69 |
+
"step": 125
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 2.6315789473684212,
|
| 73 |
+
"grad_norm": 4.3272223472595215,
|
| 74 |
+
"learning_rate": 7.45e-07,
|
| 75 |
+
"loss": 0.6423,
|
| 76 |
+
"step": 150
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 3.0,
|
| 80 |
+
"eval_accuracy": 0.9125,
|
| 81 |
+
"eval_f1": 0.9195402298850575,
|
| 82 |
+
"eval_loss": 0.6100292801856995,
|
| 83 |
+
"eval_precision": 0.9090909090909091,
|
| 84 |
+
"eval_recall": 0.9302325581395349,
|
| 85 |
+
"eval_runtime": 3.5042,
|
| 86 |
+
"eval_samples_per_second": 22.829,
|
| 87 |
+
"eval_steps_per_second": 2.854,
|
| 88 |
+
"step": 171
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"epoch": 3.0701754385964914,
|
| 92 |
+
"grad_norm": 4.4310302734375,
|
| 93 |
+
"learning_rate": 8.699999999999999e-07,
|
| 94 |
+
"loss": 0.6113,
|
| 95 |
+
"step": 175
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"epoch": 3.5087719298245617,
|
| 99 |
+
"grad_norm": 3.7883880138397217,
|
| 100 |
+
"learning_rate": 9.95e-07,
|
| 101 |
+
"loss": 0.587,
|
| 102 |
+
"step": 200
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 3.9473684210526314,
|
| 106 |
+
"grad_norm": 4.989749908447266,
|
| 107 |
+
"learning_rate": 1.12e-06,
|
| 108 |
+
"loss": 0.5664,
|
| 109 |
+
"step": 225
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"epoch": 4.0,
|
| 113 |
+
"eval_accuracy": 0.9375,
|
| 114 |
+
"eval_f1": 0.945054945054945,
|
| 115 |
+
"eval_loss": 0.5416288375854492,
|
| 116 |
+
"eval_precision": 0.8958333333333334,
|
| 117 |
+
"eval_recall": 1.0,
|
| 118 |
+
"eval_runtime": 3.3989,
|
| 119 |
+
"eval_samples_per_second": 23.537,
|
| 120 |
+
"eval_steps_per_second": 2.942,
|
| 121 |
+
"step": 228
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 4.385964912280702,
|
| 125 |
+
"grad_norm": 4.706058979034424,
|
| 126 |
+
"learning_rate": 1.245e-06,
|
| 127 |
+
"loss": 0.533,
|
| 128 |
+
"step": 250
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 4.824561403508772,
|
| 132 |
+
"grad_norm": 4.749231338500977,
|
| 133 |
+
"learning_rate": 1.37e-06,
|
| 134 |
+
"loss": 0.5036,
|
| 135 |
+
"step": 275
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 5.0,
|
| 139 |
+
"eval_accuracy": 0.9625,
|
| 140 |
+
"eval_f1": 0.9662921348314607,
|
| 141 |
+
"eval_loss": 0.45674929022789,
|
| 142 |
+
"eval_precision": 0.9347826086956522,
|
| 143 |
+
"eval_recall": 1.0,
|
| 144 |
+
"eval_runtime": 3.3705,
|
| 145 |
+
"eval_samples_per_second": 23.735,
|
| 146 |
+
"eval_steps_per_second": 2.967,
|
| 147 |
+
"step": 285
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"epoch": 5.2631578947368425,
|
| 151 |
+
"grad_norm": 5.614340782165527,
|
| 152 |
+
"learning_rate": 1.495e-06,
|
| 153 |
+
"loss": 0.451,
|
| 154 |
+
"step": 300
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 5.701754385964913,
|
| 158 |
+
"grad_norm": 3.3072502613067627,
|
| 159 |
+
"learning_rate": 1.6200000000000002e-06,
|
| 160 |
+
"loss": 0.4289,
|
| 161 |
+
"step": 325
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 6.0,
|
| 165 |
+
"eval_accuracy": 0.975,
|
| 166 |
+
"eval_f1": 0.9772727272727273,
|
| 167 |
+
"eval_loss": 0.3828332722187042,
|
| 168 |
+
"eval_precision": 0.9555555555555556,
|
| 169 |
+
"eval_recall": 1.0,
|
| 170 |
+
"eval_runtime": 3.3562,
|
| 171 |
+
"eval_samples_per_second": 23.836,
|
| 172 |
+
"eval_steps_per_second": 2.98,
|
| 173 |
+
"step": 342
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"epoch": 6.140350877192983,
|
| 177 |
+
"grad_norm": 4.262056827545166,
|
| 178 |
+
"learning_rate": 1.745e-06,
|
| 179 |
+
"loss": 0.3868,
|
| 180 |
+
"step": 350
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 6.578947368421053,
|
| 184 |
+
"grad_norm": 2.7170867919921875,
|
| 185 |
+
"learning_rate": 1.8699999999999999e-06,
|
| 186 |
+
"loss": 0.3527,
|
| 187 |
+
"step": 375
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"epoch": 7.0,
|
| 191 |
+
"eval_accuracy": 1.0,
|
| 192 |
+
"eval_f1": 1.0,
|
| 193 |
+
"eval_loss": 0.3189076781272888,
|
| 194 |
+
"eval_precision": 1.0,
|
| 195 |
+
"eval_recall": 1.0,
|
| 196 |
+
"eval_runtime": 3.349,
|
| 197 |
+
"eval_samples_per_second": 23.888,
|
| 198 |
+
"eval_steps_per_second": 2.986,
|
| 199 |
+
"step": 399
|
| 200 |
+
}
|
| 201 |
+
],
|
| 202 |
+
"logging_steps": 25,
|
| 203 |
+
"max_steps": 1140,
|
| 204 |
+
"num_input_tokens_seen": 0,
|
| 205 |
+
"num_train_epochs": 20,
|
| 206 |
+
"save_steps": 500,
|
| 207 |
+
"stateful_callbacks": {
|
| 208 |
+
"EarlyStoppingCallback": {
|
| 209 |
+
"args": {
|
| 210 |
+
"early_stopping_patience": 5,
|
| 211 |
+
"early_stopping_threshold": 0.0
|
| 212 |
+
},
|
| 213 |
+
"attributes": {
|
| 214 |
+
"early_stopping_patience_counter": 0
|
| 215 |
+
}
|
| 216 |
+
},
|
| 217 |
+
"TrainerControl": {
|
| 218 |
+
"args": {
|
| 219 |
+
"should_epoch_stop": false,
|
| 220 |
+
"should_evaluate": false,
|
| 221 |
+
"should_log": false,
|
| 222 |
+
"should_save": true,
|
| 223 |
+
"should_training_stop": false
|
| 224 |
+
},
|
| 225 |
+
"attributes": {}
|
| 226 |
+
}
|
| 227 |
+
},
|
| 228 |
+
"total_flos": 828799824384000.0,
|
| 229 |
+
"train_batch_size": 8,
|
| 230 |
+
"trial_name": null,
|
| 231 |
+
"trial_params": null
|
| 232 |
+
}
|
checkpoint-399/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
|
| 3 |
+
size 5777
|
checkpoint-456/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
checkpoint-456/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b2e953bb67f488c7a1107df218ca62baef18561db4745cb5444166fb8b41d55
|
| 3 |
+
size 437958648
|
checkpoint-456/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:567989590572f110b4143eddfe2b7e09da0392fa8d4223d8d622fd32c7945f55
|
| 3 |
+
size 876033163
|
checkpoint-456/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b46fdb450f07235e10b0a3cbfc22c5b992fc3bc85e2c60f4f801ecd647891c56
|
| 3 |
+
size 14455
|
checkpoint-456/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d6bbc54a1bfa58ad2f93c0f3c37d1fed1cefc747af9e48c028e8caf5b9c0a05
|
| 3 |
+
size 1465
|
checkpoint-456/trainer_state.json
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 399,
|
| 3 |
+
"best_metric": 1.0,
|
| 4 |
+
"best_model_checkpoint": "./ai_detector_model_v10/checkpoint-399",
|
| 5 |
+
"epoch": 8.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 456,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.43859649122807015,
|
| 14 |
+
"grad_norm": 2.53117299079895,
|
| 15 |
+
"learning_rate": 1.2000000000000002e-07,
|
| 16 |
+
"loss": 0.6905,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.8771929824561403,
|
| 21 |
+
"grad_norm": 3.241912841796875,
|
| 22 |
+
"learning_rate": 2.45e-07,
|
| 23 |
+
"loss": 0.6869,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.0,
|
| 28 |
+
"eval_accuracy": 0.5,
|
| 29 |
+
"eval_f1": 0.16666666666666666,
|
| 30 |
+
"eval_loss": 0.6886580586433411,
|
| 31 |
+
"eval_precision": 0.8,
|
| 32 |
+
"eval_recall": 0.09302325581395349,
|
| 33 |
+
"eval_runtime": 4.3688,
|
| 34 |
+
"eval_samples_per_second": 18.312,
|
| 35 |
+
"eval_steps_per_second": 2.289,
|
| 36 |
+
"step": 57
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.3157894736842106,
|
| 40 |
+
"grad_norm": 2.8000667095184326,
|
| 41 |
+
"learning_rate": 3.7e-07,
|
| 42 |
+
"loss": 0.6872,
|
| 43 |
+
"step": 75
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1.7543859649122808,
|
| 47 |
+
"grad_norm": 3.0732340812683105,
|
| 48 |
+
"learning_rate": 4.95e-07,
|
| 49 |
+
"loss": 0.6721,
|
| 50 |
+
"step": 100
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 2.0,
|
| 54 |
+
"eval_accuracy": 0.6,
|
| 55 |
+
"eval_f1": 0.4482758620689655,
|
| 56 |
+
"eval_loss": 0.6631786227226257,
|
| 57 |
+
"eval_precision": 0.8666666666666667,
|
| 58 |
+
"eval_recall": 0.3023255813953488,
|
| 59 |
+
"eval_runtime": 3.4954,
|
| 60 |
+
"eval_samples_per_second": 22.887,
|
| 61 |
+
"eval_steps_per_second": 2.861,
|
| 62 |
+
"step": 114
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"epoch": 2.192982456140351,
|
| 66 |
+
"grad_norm": 5.148472309112549,
|
| 67 |
+
"learning_rate": 6.2e-07,
|
| 68 |
+
"loss": 0.6639,
|
| 69 |
+
"step": 125
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 2.6315789473684212,
|
| 73 |
+
"grad_norm": 4.3272223472595215,
|
| 74 |
+
"learning_rate": 7.45e-07,
|
| 75 |
+
"loss": 0.6423,
|
| 76 |
+
"step": 150
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 3.0,
|
| 80 |
+
"eval_accuracy": 0.9125,
|
| 81 |
+
"eval_f1": 0.9195402298850575,
|
| 82 |
+
"eval_loss": 0.6100292801856995,
|
| 83 |
+
"eval_precision": 0.9090909090909091,
|
| 84 |
+
"eval_recall": 0.9302325581395349,
|
| 85 |
+
"eval_runtime": 3.5042,
|
| 86 |
+
"eval_samples_per_second": 22.829,
|
| 87 |
+
"eval_steps_per_second": 2.854,
|
| 88 |
+
"step": 171
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"epoch": 3.0701754385964914,
|
| 92 |
+
"grad_norm": 4.4310302734375,
|
| 93 |
+
"learning_rate": 8.699999999999999e-07,
|
| 94 |
+
"loss": 0.6113,
|
| 95 |
+
"step": 175
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"epoch": 3.5087719298245617,
|
| 99 |
+
"grad_norm": 3.7883880138397217,
|
| 100 |
+
"learning_rate": 9.95e-07,
|
| 101 |
+
"loss": 0.587,
|
| 102 |
+
"step": 200
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 3.9473684210526314,
|
| 106 |
+
"grad_norm": 4.989749908447266,
|
| 107 |
+
"learning_rate": 1.12e-06,
|
| 108 |
+
"loss": 0.5664,
|
| 109 |
+
"step": 225
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"epoch": 4.0,
|
| 113 |
+
"eval_accuracy": 0.9375,
|
| 114 |
+
"eval_f1": 0.945054945054945,
|
| 115 |
+
"eval_loss": 0.5416288375854492,
|
| 116 |
+
"eval_precision": 0.8958333333333334,
|
| 117 |
+
"eval_recall": 1.0,
|
| 118 |
+
"eval_runtime": 3.3989,
|
| 119 |
+
"eval_samples_per_second": 23.537,
|
| 120 |
+
"eval_steps_per_second": 2.942,
|
| 121 |
+
"step": 228
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 4.385964912280702,
|
| 125 |
+
"grad_norm": 4.706058979034424,
|
| 126 |
+
"learning_rate": 1.245e-06,
|
| 127 |
+
"loss": 0.533,
|
| 128 |
+
"step": 250
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 4.824561403508772,
|
| 132 |
+
"grad_norm": 4.749231338500977,
|
| 133 |
+
"learning_rate": 1.37e-06,
|
| 134 |
+
"loss": 0.5036,
|
| 135 |
+
"step": 275
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 5.0,
|
| 139 |
+
"eval_accuracy": 0.9625,
|
| 140 |
+
"eval_f1": 0.9662921348314607,
|
| 141 |
+
"eval_loss": 0.45674929022789,
|
| 142 |
+
"eval_precision": 0.9347826086956522,
|
| 143 |
+
"eval_recall": 1.0,
|
| 144 |
+
"eval_runtime": 3.3705,
|
| 145 |
+
"eval_samples_per_second": 23.735,
|
| 146 |
+
"eval_steps_per_second": 2.967,
|
| 147 |
+
"step": 285
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"epoch": 5.2631578947368425,
|
| 151 |
+
"grad_norm": 5.614340782165527,
|
| 152 |
+
"learning_rate": 1.495e-06,
|
| 153 |
+
"loss": 0.451,
|
| 154 |
+
"step": 300
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 5.701754385964913,
|
| 158 |
+
"grad_norm": 3.3072502613067627,
|
| 159 |
+
"learning_rate": 1.6200000000000002e-06,
|
| 160 |
+
"loss": 0.4289,
|
| 161 |
+
"step": 325
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 6.0,
|
| 165 |
+
"eval_accuracy": 0.975,
|
| 166 |
+
"eval_f1": 0.9772727272727273,
|
| 167 |
+
"eval_loss": 0.3828332722187042,
|
| 168 |
+
"eval_precision": 0.9555555555555556,
|
| 169 |
+
"eval_recall": 1.0,
|
| 170 |
+
"eval_runtime": 3.3562,
|
| 171 |
+
"eval_samples_per_second": 23.836,
|
| 172 |
+
"eval_steps_per_second": 2.98,
|
| 173 |
+
"step": 342
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"epoch": 6.140350877192983,
|
| 177 |
+
"grad_norm": 4.262056827545166,
|
| 178 |
+
"learning_rate": 1.745e-06,
|
| 179 |
+
"loss": 0.3868,
|
| 180 |
+
"step": 350
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 6.578947368421053,
|
| 184 |
+
"grad_norm": 2.7170867919921875,
|
| 185 |
+
"learning_rate": 1.8699999999999999e-06,
|
| 186 |
+
"loss": 0.3527,
|
| 187 |
+
"step": 375
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"epoch": 7.0,
|
| 191 |
+
"eval_accuracy": 1.0,
|
| 192 |
+
"eval_f1": 1.0,
|
| 193 |
+
"eval_loss": 0.3189076781272888,
|
| 194 |
+
"eval_precision": 1.0,
|
| 195 |
+
"eval_recall": 1.0,
|
| 196 |
+
"eval_runtime": 3.349,
|
| 197 |
+
"eval_samples_per_second": 23.888,
|
| 198 |
+
"eval_steps_per_second": 2.986,
|
| 199 |
+
"step": 399
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 7.017543859649122,
|
| 203 |
+
"grad_norm": 1.8840763568878174,
|
| 204 |
+
"learning_rate": 1.995e-06,
|
| 205 |
+
"loss": 0.3292,
|
| 206 |
+
"step": 400
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 7.456140350877193,
|
| 210 |
+
"grad_norm": 2.5557754039764404,
|
| 211 |
+
"learning_rate": 2.12e-06,
|
| 212 |
+
"loss": 0.3073,
|
| 213 |
+
"step": 425
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 7.894736842105263,
|
| 217 |
+
"grad_norm": 2.818211555480957,
|
| 218 |
+
"learning_rate": 2.245e-06,
|
| 219 |
+
"loss": 0.2884,
|
| 220 |
+
"step": 450
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 8.0,
|
| 224 |
+
"eval_accuracy": 1.0,
|
| 225 |
+
"eval_f1": 1.0,
|
| 226 |
+
"eval_loss": 0.2968425154685974,
|
| 227 |
+
"eval_precision": 1.0,
|
| 228 |
+
"eval_recall": 1.0,
|
| 229 |
+
"eval_runtime": 3.4168,
|
| 230 |
+
"eval_samples_per_second": 23.414,
|
| 231 |
+
"eval_steps_per_second": 2.927,
|
| 232 |
+
"step": 456
|
| 233 |
+
}
|
| 234 |
+
],
|
| 235 |
+
"logging_steps": 25,
|
| 236 |
+
"max_steps": 1140,
|
| 237 |
+
"num_input_tokens_seen": 0,
|
| 238 |
+
"num_train_epochs": 20,
|
| 239 |
+
"save_steps": 500,
|
| 240 |
+
"stateful_callbacks": {
|
| 241 |
+
"EarlyStoppingCallback": {
|
| 242 |
+
"args": {
|
| 243 |
+
"early_stopping_patience": 5,
|
| 244 |
+
"early_stopping_threshold": 0.0
|
| 245 |
+
},
|
| 246 |
+
"attributes": {
|
| 247 |
+
"early_stopping_patience_counter": 1
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
+
"TrainerControl": {
|
| 251 |
+
"args": {
|
| 252 |
+
"should_epoch_stop": false,
|
| 253 |
+
"should_evaluate": false,
|
| 254 |
+
"should_log": false,
|
| 255 |
+
"should_save": true,
|
| 256 |
+
"should_training_stop": false
|
| 257 |
+
},
|
| 258 |
+
"attributes": {}
|
| 259 |
+
}
|
| 260 |
+
},
|
| 261 |
+
"total_flos": 947199799296000.0,
|
| 262 |
+
"train_batch_size": 8,
|
| 263 |
+
"trial_name": null,
|
| 264 |
+
"trial_params": null
|
| 265 |
+
}
|
checkpoint-456/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
|
| 3 |
+
size 5777
|
checkpoint-513/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "Human",
|
| 14 |
+
"1": "AI"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 3072,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"AI": 1,
|
| 20 |
+
"Human": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 12,
|
| 26 |
+
"num_hidden_layers": 12,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"transformers_version": "4.57.3",
|
| 30 |
+
"type_vocab_size": 2,
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|