textsightai commited on
Commit
caaa9d2
·
verified ·
1 Parent(s): f650e2f

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-114/config.json +33 -0
  2. checkpoint-114/model.safetensors +3 -0
  3. checkpoint-114/optimizer.pt +3 -0
  4. checkpoint-114/rng_state.pth +3 -0
  5. checkpoint-114/scheduler.pt +3 -0
  6. checkpoint-114/trainer_state.json +95 -0
  7. checkpoint-114/training_args.bin +3 -0
  8. checkpoint-171/config.json +33 -0
  9. checkpoint-171/model.safetensors +3 -0
  10. checkpoint-171/optimizer.pt +3 -0
  11. checkpoint-171/rng_state.pth +3 -0
  12. checkpoint-171/scheduler.pt +3 -0
  13. checkpoint-171/trainer_state.json +121 -0
  14. checkpoint-171/training_args.bin +3 -0
  15. checkpoint-228/config.json +33 -0
  16. checkpoint-228/model.safetensors +3 -0
  17. checkpoint-228/optimizer.pt +3 -0
  18. checkpoint-228/rng_state.pth +3 -0
  19. checkpoint-228/scheduler.pt +3 -0
  20. checkpoint-228/trainer_state.json +154 -0
  21. checkpoint-228/training_args.bin +3 -0
  22. checkpoint-285/config.json +33 -0
  23. checkpoint-285/model.safetensors +3 -0
  24. checkpoint-285/optimizer.pt +3 -0
  25. checkpoint-285/rng_state.pth +3 -0
  26. checkpoint-285/scheduler.pt +3 -0
  27. checkpoint-285/trainer_state.json +180 -0
  28. checkpoint-285/training_args.bin +3 -0
  29. checkpoint-342/config.json +33 -0
  30. checkpoint-342/model.safetensors +3 -0
  31. checkpoint-342/optimizer.pt +3 -0
  32. checkpoint-342/rng_state.pth +3 -0
  33. checkpoint-342/scheduler.pt +3 -0
  34. checkpoint-342/trainer_state.json +206 -0
  35. checkpoint-342/training_args.bin +3 -0
  36. checkpoint-399/config.json +33 -0
  37. checkpoint-399/model.safetensors +3 -0
  38. checkpoint-399/optimizer.pt +3 -0
  39. checkpoint-399/rng_state.pth +3 -0
  40. checkpoint-399/scheduler.pt +3 -0
  41. checkpoint-399/trainer_state.json +232 -0
  42. checkpoint-399/training_args.bin +3 -0
  43. checkpoint-456/config.json +33 -0
  44. checkpoint-456/model.safetensors +3 -0
  45. checkpoint-456/optimizer.pt +3 -0
  46. checkpoint-456/rng_state.pth +3 -0
  47. checkpoint-456/scheduler.pt +3 -0
  48. checkpoint-456/trainer_state.json +265 -0
  49. checkpoint-456/training_args.bin +3 -0
  50. checkpoint-513/config.json +33 -0
checkpoint-114/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }
checkpoint-114/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba07e64572fb03035e6ad53dc30c7db64d902eb4157cb66935426945358712f1
3
+ size 437958648
checkpoint-114/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b118ab24d14452e0fcb395f1e9e76053c0ac34053f040c46497c931b9ff5b04
3
+ size 876033163
checkpoint-114/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1e0d31acc437fbbd16411d0d11d500c5f4dbbc7561671dab7dbf23eb0f2c43
3
+ size 14455
checkpoint-114/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c25199463fa046ddd18b57985caa892648a50605fcbefcaff1fd9e9d82c8e75
3
+ size 1465
checkpoint-114/trainer_state.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 114,
3
+ "best_metric": 0.4482758620689655,
4
+ "best_model_checkpoint": "./ai_detector_model_v10/checkpoint-114",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 114,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 2.53117299079895,
15
+ "learning_rate": 1.2000000000000002e-07,
16
+ "loss": 0.6905,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 3.241912841796875,
22
+ "learning_rate": 2.45e-07,
23
+ "loss": 0.6869,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5,
29
+ "eval_f1": 0.16666666666666666,
30
+ "eval_loss": 0.6886580586433411,
31
+ "eval_precision": 0.8,
32
+ "eval_recall": 0.09302325581395349,
33
+ "eval_runtime": 4.3688,
34
+ "eval_samples_per_second": 18.312,
35
+ "eval_steps_per_second": 2.289,
36
+ "step": 57
37
+ },
38
+ {
39
+ "epoch": 1.3157894736842106,
40
+ "grad_norm": 2.8000667095184326,
41
+ "learning_rate": 3.7e-07,
42
+ "loss": 0.6872,
43
+ "step": 75
44
+ },
45
+ {
46
+ "epoch": 1.7543859649122808,
47
+ "grad_norm": 3.0732340812683105,
48
+ "learning_rate": 4.95e-07,
49
+ "loss": 0.6721,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "eval_accuracy": 0.6,
55
+ "eval_f1": 0.4482758620689655,
56
+ "eval_loss": 0.6631786227226257,
57
+ "eval_precision": 0.8666666666666667,
58
+ "eval_recall": 0.3023255813953488,
59
+ "eval_runtime": 3.4954,
60
+ "eval_samples_per_second": 22.887,
61
+ "eval_steps_per_second": 2.861,
62
+ "step": 114
63
+ }
64
+ ],
65
+ "logging_steps": 25,
66
+ "max_steps": 1140,
67
+ "num_input_tokens_seen": 0,
68
+ "num_train_epochs": 20,
69
+ "save_steps": 500,
70
+ "stateful_callbacks": {
71
+ "EarlyStoppingCallback": {
72
+ "args": {
73
+ "early_stopping_patience": 5,
74
+ "early_stopping_threshold": 0.0
75
+ },
76
+ "attributes": {
77
+ "early_stopping_patience_counter": 0
78
+ }
79
+ },
80
+ "TrainerControl": {
81
+ "args": {
82
+ "should_epoch_stop": false,
83
+ "should_evaluate": false,
84
+ "should_log": false,
85
+ "should_save": true,
86
+ "should_training_stop": false
87
+ },
88
+ "attributes": {}
89
+ }
90
+ },
91
+ "total_flos": 236799949824000.0,
92
+ "train_batch_size": 8,
93
+ "trial_name": null,
94
+ "trial_params": null
95
+ }
checkpoint-114/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
3
+ size 5777
checkpoint-171/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }
checkpoint-171/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55009821690e917db1114dbc848baec4d7739ac9286431f21472af667a8eddf7
3
+ size 437958648
checkpoint-171/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c521d9bd88b4ab3f2cf6491df2b5355f289b80dda6f61e2e3c9f6e4ba347c9
3
+ size 876033163
checkpoint-171/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391d01d3aeb4a35151817d446e4ba0b9c8a04084ae1b1b66eda188a30729da0a
3
+ size 14455
checkpoint-171/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891b886d78f6365df6b5329e3942b03490ef41cb2ce6ea4d05691c57a4531bac
3
+ size 1465
checkpoint-171/trainer_state.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 171,
3
+ "best_metric": 0.9195402298850575,
4
+ "best_model_checkpoint": "./ai_detector_model_v10/checkpoint-171",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 171,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 2.53117299079895,
15
+ "learning_rate": 1.2000000000000002e-07,
16
+ "loss": 0.6905,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 3.241912841796875,
22
+ "learning_rate": 2.45e-07,
23
+ "loss": 0.6869,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5,
29
+ "eval_f1": 0.16666666666666666,
30
+ "eval_loss": 0.6886580586433411,
31
+ "eval_precision": 0.8,
32
+ "eval_recall": 0.09302325581395349,
33
+ "eval_runtime": 4.3688,
34
+ "eval_samples_per_second": 18.312,
35
+ "eval_steps_per_second": 2.289,
36
+ "step": 57
37
+ },
38
+ {
39
+ "epoch": 1.3157894736842106,
40
+ "grad_norm": 2.8000667095184326,
41
+ "learning_rate": 3.7e-07,
42
+ "loss": 0.6872,
43
+ "step": 75
44
+ },
45
+ {
46
+ "epoch": 1.7543859649122808,
47
+ "grad_norm": 3.0732340812683105,
48
+ "learning_rate": 4.95e-07,
49
+ "loss": 0.6721,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "eval_accuracy": 0.6,
55
+ "eval_f1": 0.4482758620689655,
56
+ "eval_loss": 0.6631786227226257,
57
+ "eval_precision": 0.8666666666666667,
58
+ "eval_recall": 0.3023255813953488,
59
+ "eval_runtime": 3.4954,
60
+ "eval_samples_per_second": 22.887,
61
+ "eval_steps_per_second": 2.861,
62
+ "step": 114
63
+ },
64
+ {
65
+ "epoch": 2.192982456140351,
66
+ "grad_norm": 5.148472309112549,
67
+ "learning_rate": 6.2e-07,
68
+ "loss": 0.6639,
69
+ "step": 125
70
+ },
71
+ {
72
+ "epoch": 2.6315789473684212,
73
+ "grad_norm": 4.3272223472595215,
74
+ "learning_rate": 7.45e-07,
75
+ "loss": 0.6423,
76
+ "step": 150
77
+ },
78
+ {
79
+ "epoch": 3.0,
80
+ "eval_accuracy": 0.9125,
81
+ "eval_f1": 0.9195402298850575,
82
+ "eval_loss": 0.6100292801856995,
83
+ "eval_precision": 0.9090909090909091,
84
+ "eval_recall": 0.9302325581395349,
85
+ "eval_runtime": 3.5042,
86
+ "eval_samples_per_second": 22.829,
87
+ "eval_steps_per_second": 2.854,
88
+ "step": 171
89
+ }
90
+ ],
91
+ "logging_steps": 25,
92
+ "max_steps": 1140,
93
+ "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 20,
95
+ "save_steps": 500,
96
+ "stateful_callbacks": {
97
+ "EarlyStoppingCallback": {
98
+ "args": {
99
+ "early_stopping_patience": 5,
100
+ "early_stopping_threshold": 0.0
101
+ },
102
+ "attributes": {
103
+ "early_stopping_patience_counter": 0
104
+ }
105
+ },
106
+ "TrainerControl": {
107
+ "args": {
108
+ "should_epoch_stop": false,
109
+ "should_evaluate": false,
110
+ "should_log": false,
111
+ "should_save": true,
112
+ "should_training_stop": false
113
+ },
114
+ "attributes": {}
115
+ }
116
+ },
117
+ "total_flos": 355199924736000.0,
118
+ "train_batch_size": 8,
119
+ "trial_name": null,
120
+ "trial_params": null
121
+ }
checkpoint-171/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
3
+ size 5777
checkpoint-228/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }
checkpoint-228/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dff874543e53030922925ea8f8650cb4f2e2a46217f2fa2a688bef7c055f9cd9
3
+ size 437958648
checkpoint-228/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eee46e5fb474bc5a87dd72d542443661ffebd3bcd8f64daef357d98ba3b03a8
3
+ size 876033163
checkpoint-228/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67627e3b026c4c5d776980914bd7f99f2f9814ae6ac5a3bd1d93ee8d2ff6784f
3
+ size 14455
checkpoint-228/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:977b9b2e7012dd8b09d3a15f4d4c2a57908497bc25ffbbd86cffb63b01ef4771
3
+ size 1465
checkpoint-228/trainer_state.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 228,
3
+ "best_metric": 0.945054945054945,
4
+ "best_model_checkpoint": "./ai_detector_model_v10/checkpoint-228",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 228,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 2.53117299079895,
15
+ "learning_rate": 1.2000000000000002e-07,
16
+ "loss": 0.6905,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 3.241912841796875,
22
+ "learning_rate": 2.45e-07,
23
+ "loss": 0.6869,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5,
29
+ "eval_f1": 0.16666666666666666,
30
+ "eval_loss": 0.6886580586433411,
31
+ "eval_precision": 0.8,
32
+ "eval_recall": 0.09302325581395349,
33
+ "eval_runtime": 4.3688,
34
+ "eval_samples_per_second": 18.312,
35
+ "eval_steps_per_second": 2.289,
36
+ "step": 57
37
+ },
38
+ {
39
+ "epoch": 1.3157894736842106,
40
+ "grad_norm": 2.8000667095184326,
41
+ "learning_rate": 3.7e-07,
42
+ "loss": 0.6872,
43
+ "step": 75
44
+ },
45
+ {
46
+ "epoch": 1.7543859649122808,
47
+ "grad_norm": 3.0732340812683105,
48
+ "learning_rate": 4.95e-07,
49
+ "loss": 0.6721,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "eval_accuracy": 0.6,
55
+ "eval_f1": 0.4482758620689655,
56
+ "eval_loss": 0.6631786227226257,
57
+ "eval_precision": 0.8666666666666667,
58
+ "eval_recall": 0.3023255813953488,
59
+ "eval_runtime": 3.4954,
60
+ "eval_samples_per_second": 22.887,
61
+ "eval_steps_per_second": 2.861,
62
+ "step": 114
63
+ },
64
+ {
65
+ "epoch": 2.192982456140351,
66
+ "grad_norm": 5.148472309112549,
67
+ "learning_rate": 6.2e-07,
68
+ "loss": 0.6639,
69
+ "step": 125
70
+ },
71
+ {
72
+ "epoch": 2.6315789473684212,
73
+ "grad_norm": 4.3272223472595215,
74
+ "learning_rate": 7.45e-07,
75
+ "loss": 0.6423,
76
+ "step": 150
77
+ },
78
+ {
79
+ "epoch": 3.0,
80
+ "eval_accuracy": 0.9125,
81
+ "eval_f1": 0.9195402298850575,
82
+ "eval_loss": 0.6100292801856995,
83
+ "eval_precision": 0.9090909090909091,
84
+ "eval_recall": 0.9302325581395349,
85
+ "eval_runtime": 3.5042,
86
+ "eval_samples_per_second": 22.829,
87
+ "eval_steps_per_second": 2.854,
88
+ "step": 171
89
+ },
90
+ {
91
+ "epoch": 3.0701754385964914,
92
+ "grad_norm": 4.4310302734375,
93
+ "learning_rate": 8.699999999999999e-07,
94
+ "loss": 0.6113,
95
+ "step": 175
96
+ },
97
+ {
98
+ "epoch": 3.5087719298245617,
99
+ "grad_norm": 3.7883880138397217,
100
+ "learning_rate": 9.95e-07,
101
+ "loss": 0.587,
102
+ "step": 200
103
+ },
104
+ {
105
+ "epoch": 3.9473684210526314,
106
+ "grad_norm": 4.989749908447266,
107
+ "learning_rate": 1.12e-06,
108
+ "loss": 0.5664,
109
+ "step": 225
110
+ },
111
+ {
112
+ "epoch": 4.0,
113
+ "eval_accuracy": 0.9375,
114
+ "eval_f1": 0.945054945054945,
115
+ "eval_loss": 0.5416288375854492,
116
+ "eval_precision": 0.8958333333333334,
117
+ "eval_recall": 1.0,
118
+ "eval_runtime": 3.3989,
119
+ "eval_samples_per_second": 23.537,
120
+ "eval_steps_per_second": 2.942,
121
+ "step": 228
122
+ }
123
+ ],
124
+ "logging_steps": 25,
125
+ "max_steps": 1140,
126
+ "num_input_tokens_seen": 0,
127
+ "num_train_epochs": 20,
128
+ "save_steps": 500,
129
+ "stateful_callbacks": {
130
+ "EarlyStoppingCallback": {
131
+ "args": {
132
+ "early_stopping_patience": 5,
133
+ "early_stopping_threshold": 0.0
134
+ },
135
+ "attributes": {
136
+ "early_stopping_patience_counter": 0
137
+ }
138
+ },
139
+ "TrainerControl": {
140
+ "args": {
141
+ "should_epoch_stop": false,
142
+ "should_evaluate": false,
143
+ "should_log": false,
144
+ "should_save": true,
145
+ "should_training_stop": false
146
+ },
147
+ "attributes": {}
148
+ }
149
+ },
150
+ "total_flos": 473599899648000.0,
151
+ "train_batch_size": 8,
152
+ "trial_name": null,
153
+ "trial_params": null
154
+ }
checkpoint-228/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
3
+ size 5777
checkpoint-285/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }
checkpoint-285/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:243655f60fd2a53b6f348a75ebbb0ac4df9a5236a4b0f72379bfa1bd3643c4f0
3
+ size 437958648
checkpoint-285/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a569ec9245736253f61c9d622a5d3ca2ff56b2f6ad0d0669c59a5cba04e6cab2
3
+ size 876033163
checkpoint-285/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b652c6269b998b96ab924b2734c0818fab436c642524e13fc6cd4d9082e62b5
3
+ size 14455
checkpoint-285/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be42d5eaf34fdb700c80731c58d1f751f473aad4e6a622406045abb719430309
3
+ size 1465
checkpoint-285/trainer_state.json ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 285,
3
+ "best_metric": 0.9662921348314607,
4
+ "best_model_checkpoint": "./ai_detector_model_v10/checkpoint-285",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 285,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 2.53117299079895,
15
+ "learning_rate": 1.2000000000000002e-07,
16
+ "loss": 0.6905,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 3.241912841796875,
22
+ "learning_rate": 2.45e-07,
23
+ "loss": 0.6869,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5,
29
+ "eval_f1": 0.16666666666666666,
30
+ "eval_loss": 0.6886580586433411,
31
+ "eval_precision": 0.8,
32
+ "eval_recall": 0.09302325581395349,
33
+ "eval_runtime": 4.3688,
34
+ "eval_samples_per_second": 18.312,
35
+ "eval_steps_per_second": 2.289,
36
+ "step": 57
37
+ },
38
+ {
39
+ "epoch": 1.3157894736842106,
40
+ "grad_norm": 2.8000667095184326,
41
+ "learning_rate": 3.7e-07,
42
+ "loss": 0.6872,
43
+ "step": 75
44
+ },
45
+ {
46
+ "epoch": 1.7543859649122808,
47
+ "grad_norm": 3.0732340812683105,
48
+ "learning_rate": 4.95e-07,
49
+ "loss": 0.6721,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "eval_accuracy": 0.6,
55
+ "eval_f1": 0.4482758620689655,
56
+ "eval_loss": 0.6631786227226257,
57
+ "eval_precision": 0.8666666666666667,
58
+ "eval_recall": 0.3023255813953488,
59
+ "eval_runtime": 3.4954,
60
+ "eval_samples_per_second": 22.887,
61
+ "eval_steps_per_second": 2.861,
62
+ "step": 114
63
+ },
64
+ {
65
+ "epoch": 2.192982456140351,
66
+ "grad_norm": 5.148472309112549,
67
+ "learning_rate": 6.2e-07,
68
+ "loss": 0.6639,
69
+ "step": 125
70
+ },
71
+ {
72
+ "epoch": 2.6315789473684212,
73
+ "grad_norm": 4.3272223472595215,
74
+ "learning_rate": 7.45e-07,
75
+ "loss": 0.6423,
76
+ "step": 150
77
+ },
78
+ {
79
+ "epoch": 3.0,
80
+ "eval_accuracy": 0.9125,
81
+ "eval_f1": 0.9195402298850575,
82
+ "eval_loss": 0.6100292801856995,
83
+ "eval_precision": 0.9090909090909091,
84
+ "eval_recall": 0.9302325581395349,
85
+ "eval_runtime": 3.5042,
86
+ "eval_samples_per_second": 22.829,
87
+ "eval_steps_per_second": 2.854,
88
+ "step": 171
89
+ },
90
+ {
91
+ "epoch": 3.0701754385964914,
92
+ "grad_norm": 4.4310302734375,
93
+ "learning_rate": 8.699999999999999e-07,
94
+ "loss": 0.6113,
95
+ "step": 175
96
+ },
97
+ {
98
+ "epoch": 3.5087719298245617,
99
+ "grad_norm": 3.7883880138397217,
100
+ "learning_rate": 9.95e-07,
101
+ "loss": 0.587,
102
+ "step": 200
103
+ },
104
+ {
105
+ "epoch": 3.9473684210526314,
106
+ "grad_norm": 4.989749908447266,
107
+ "learning_rate": 1.12e-06,
108
+ "loss": 0.5664,
109
+ "step": 225
110
+ },
111
+ {
112
+ "epoch": 4.0,
113
+ "eval_accuracy": 0.9375,
114
+ "eval_f1": 0.945054945054945,
115
+ "eval_loss": 0.5416288375854492,
116
+ "eval_precision": 0.8958333333333334,
117
+ "eval_recall": 1.0,
118
+ "eval_runtime": 3.3989,
119
+ "eval_samples_per_second": 23.537,
120
+ "eval_steps_per_second": 2.942,
121
+ "step": 228
122
+ },
123
+ {
124
+ "epoch": 4.385964912280702,
125
+ "grad_norm": 4.706058979034424,
126
+ "learning_rate": 1.245e-06,
127
+ "loss": 0.533,
128
+ "step": 250
129
+ },
130
+ {
131
+ "epoch": 4.824561403508772,
132
+ "grad_norm": 4.749231338500977,
133
+ "learning_rate": 1.37e-06,
134
+ "loss": 0.5036,
135
+ "step": 275
136
+ },
137
+ {
138
+ "epoch": 5.0,
139
+ "eval_accuracy": 0.9625,
140
+ "eval_f1": 0.9662921348314607,
141
+ "eval_loss": 0.45674929022789,
142
+ "eval_precision": 0.9347826086956522,
143
+ "eval_recall": 1.0,
144
+ "eval_runtime": 3.3705,
145
+ "eval_samples_per_second": 23.735,
146
+ "eval_steps_per_second": 2.967,
147
+ "step": 285
148
+ }
149
+ ],
150
+ "logging_steps": 25,
151
+ "max_steps": 1140,
152
+ "num_input_tokens_seen": 0,
153
+ "num_train_epochs": 20,
154
+ "save_steps": 500,
155
+ "stateful_callbacks": {
156
+ "EarlyStoppingCallback": {
157
+ "args": {
158
+ "early_stopping_patience": 5,
159
+ "early_stopping_threshold": 0.0
160
+ },
161
+ "attributes": {
162
+ "early_stopping_patience_counter": 0
163
+ }
164
+ },
165
+ "TrainerControl": {
166
+ "args": {
167
+ "should_epoch_stop": false,
168
+ "should_evaluate": false,
169
+ "should_log": false,
170
+ "should_save": true,
171
+ "should_training_stop": false
172
+ },
173
+ "attributes": {}
174
+ }
175
+ },
176
+ "total_flos": 591999874560000.0,
177
+ "train_batch_size": 8,
178
+ "trial_name": null,
179
+ "trial_params": null
180
+ }
checkpoint-285/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
3
+ size 5777
checkpoint-342/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }
checkpoint-342/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f72b00d2d7a1ae64b7adf1bbaf6c729c1965c99bd5f45fa684140d8892009ab
3
+ size 437958648
checkpoint-342/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da84ddd3ff2865d4c34a9d078b89e224d1cc0d5f04eeed698cfce024979210b5
3
+ size 876033163
checkpoint-342/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e38e4bded6fa666a78b369223010c84f8eafdd4ce4069224aa6f2854b4222440
3
+ size 14455
checkpoint-342/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb7e7c352d53a21fe634efbd24e6a75fe26234a1854ab93ea6f0ab92429f7e8
3
+ size 1465
checkpoint-342/trainer_state.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 342,
3
+ "best_metric": 0.9772727272727273,
4
+ "best_model_checkpoint": "./ai_detector_model_v10/checkpoint-342",
5
+ "epoch": 6.0,
6
+ "eval_steps": 500,
7
+ "global_step": 342,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 2.53117299079895,
15
+ "learning_rate": 1.2000000000000002e-07,
16
+ "loss": 0.6905,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 3.241912841796875,
22
+ "learning_rate": 2.45e-07,
23
+ "loss": 0.6869,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5,
29
+ "eval_f1": 0.16666666666666666,
30
+ "eval_loss": 0.6886580586433411,
31
+ "eval_precision": 0.8,
32
+ "eval_recall": 0.09302325581395349,
33
+ "eval_runtime": 4.3688,
34
+ "eval_samples_per_second": 18.312,
35
+ "eval_steps_per_second": 2.289,
36
+ "step": 57
37
+ },
38
+ {
39
+ "epoch": 1.3157894736842106,
40
+ "grad_norm": 2.8000667095184326,
41
+ "learning_rate": 3.7e-07,
42
+ "loss": 0.6872,
43
+ "step": 75
44
+ },
45
+ {
46
+ "epoch": 1.7543859649122808,
47
+ "grad_norm": 3.0732340812683105,
48
+ "learning_rate": 4.95e-07,
49
+ "loss": 0.6721,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "eval_accuracy": 0.6,
55
+ "eval_f1": 0.4482758620689655,
56
+ "eval_loss": 0.6631786227226257,
57
+ "eval_precision": 0.8666666666666667,
58
+ "eval_recall": 0.3023255813953488,
59
+ "eval_runtime": 3.4954,
60
+ "eval_samples_per_second": 22.887,
61
+ "eval_steps_per_second": 2.861,
62
+ "step": 114
63
+ },
64
+ {
65
+ "epoch": 2.192982456140351,
66
+ "grad_norm": 5.148472309112549,
67
+ "learning_rate": 6.2e-07,
68
+ "loss": 0.6639,
69
+ "step": 125
70
+ },
71
+ {
72
+ "epoch": 2.6315789473684212,
73
+ "grad_norm": 4.3272223472595215,
74
+ "learning_rate": 7.45e-07,
75
+ "loss": 0.6423,
76
+ "step": 150
77
+ },
78
+ {
79
+ "epoch": 3.0,
80
+ "eval_accuracy": 0.9125,
81
+ "eval_f1": 0.9195402298850575,
82
+ "eval_loss": 0.6100292801856995,
83
+ "eval_precision": 0.9090909090909091,
84
+ "eval_recall": 0.9302325581395349,
85
+ "eval_runtime": 3.5042,
86
+ "eval_samples_per_second": 22.829,
87
+ "eval_steps_per_second": 2.854,
88
+ "step": 171
89
+ },
90
+ {
91
+ "epoch": 3.0701754385964914,
92
+ "grad_norm": 4.4310302734375,
93
+ "learning_rate": 8.699999999999999e-07,
94
+ "loss": 0.6113,
95
+ "step": 175
96
+ },
97
+ {
98
+ "epoch": 3.5087719298245617,
99
+ "grad_norm": 3.7883880138397217,
100
+ "learning_rate": 9.95e-07,
101
+ "loss": 0.587,
102
+ "step": 200
103
+ },
104
+ {
105
+ "epoch": 3.9473684210526314,
106
+ "grad_norm": 4.989749908447266,
107
+ "learning_rate": 1.12e-06,
108
+ "loss": 0.5664,
109
+ "step": 225
110
+ },
111
+ {
112
+ "epoch": 4.0,
113
+ "eval_accuracy": 0.9375,
114
+ "eval_f1": 0.945054945054945,
115
+ "eval_loss": 0.5416288375854492,
116
+ "eval_precision": 0.8958333333333334,
117
+ "eval_recall": 1.0,
118
+ "eval_runtime": 3.3989,
119
+ "eval_samples_per_second": 23.537,
120
+ "eval_steps_per_second": 2.942,
121
+ "step": 228
122
+ },
123
+ {
124
+ "epoch": 4.385964912280702,
125
+ "grad_norm": 4.706058979034424,
126
+ "learning_rate": 1.245e-06,
127
+ "loss": 0.533,
128
+ "step": 250
129
+ },
130
+ {
131
+ "epoch": 4.824561403508772,
132
+ "grad_norm": 4.749231338500977,
133
+ "learning_rate": 1.37e-06,
134
+ "loss": 0.5036,
135
+ "step": 275
136
+ },
137
+ {
138
+ "epoch": 5.0,
139
+ "eval_accuracy": 0.9625,
140
+ "eval_f1": 0.9662921348314607,
141
+ "eval_loss": 0.45674929022789,
142
+ "eval_precision": 0.9347826086956522,
143
+ "eval_recall": 1.0,
144
+ "eval_runtime": 3.3705,
145
+ "eval_samples_per_second": 23.735,
146
+ "eval_steps_per_second": 2.967,
147
+ "step": 285
148
+ },
149
+ {
150
+ "epoch": 5.2631578947368425,
151
+ "grad_norm": 5.614340782165527,
152
+ "learning_rate": 1.495e-06,
153
+ "loss": 0.451,
154
+ "step": 300
155
+ },
156
+ {
157
+ "epoch": 5.701754385964913,
158
+ "grad_norm": 3.3072502613067627,
159
+ "learning_rate": 1.6200000000000002e-06,
160
+ "loss": 0.4289,
161
+ "step": 325
162
+ },
163
+ {
164
+ "epoch": 6.0,
165
+ "eval_accuracy": 0.975,
166
+ "eval_f1": 0.9772727272727273,
167
+ "eval_loss": 0.3828332722187042,
168
+ "eval_precision": 0.9555555555555556,
169
+ "eval_recall": 1.0,
170
+ "eval_runtime": 3.3562,
171
+ "eval_samples_per_second": 23.836,
172
+ "eval_steps_per_second": 2.98,
173
+ "step": 342
174
+ }
175
+ ],
176
+ "logging_steps": 25,
177
+ "max_steps": 1140,
178
+ "num_input_tokens_seen": 0,
179
+ "num_train_epochs": 20,
180
+ "save_steps": 500,
181
+ "stateful_callbacks": {
182
+ "EarlyStoppingCallback": {
183
+ "args": {
184
+ "early_stopping_patience": 5,
185
+ "early_stopping_threshold": 0.0
186
+ },
187
+ "attributes": {
188
+ "early_stopping_patience_counter": 0
189
+ }
190
+ },
191
+ "TrainerControl": {
192
+ "args": {
193
+ "should_epoch_stop": false,
194
+ "should_evaluate": false,
195
+ "should_log": false,
196
+ "should_save": true,
197
+ "should_training_stop": false
198
+ },
199
+ "attributes": {}
200
+ }
201
+ },
202
+ "total_flos": 710399849472000.0,
203
+ "train_batch_size": 8,
204
+ "trial_name": null,
205
+ "trial_params": null
206
+ }
checkpoint-342/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
3
+ size 5777
checkpoint-399/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }
checkpoint-399/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6be057716b0865c0670b59edfdfba482333cdf657324e9764ca2b22f71596d
3
+ size 437958648
checkpoint-399/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc94edb2f9de087f036df04a8f8e77a07959e3b8de6476bc1d82ec96638399a5
3
+ size 876033163
checkpoint-399/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13fa096c82c4698939c19f78b0766bd5c2a2142c49acdf650aabfa34b99d7f1
3
+ size 14455
checkpoint-399/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a608ccd4279d7f75d5dec35e3fdfbc4a49180b3b225e3a554b45081a544a1e5
3
+ size 1465
checkpoint-399/trainer_state.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 399,
3
+ "best_metric": 1.0,
4
+ "best_model_checkpoint": "./ai_detector_model_v10/checkpoint-399",
5
+ "epoch": 7.0,
6
+ "eval_steps": 500,
7
+ "global_step": 399,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 2.53117299079895,
15
+ "learning_rate": 1.2000000000000002e-07,
16
+ "loss": 0.6905,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 3.241912841796875,
22
+ "learning_rate": 2.45e-07,
23
+ "loss": 0.6869,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5,
29
+ "eval_f1": 0.16666666666666666,
30
+ "eval_loss": 0.6886580586433411,
31
+ "eval_precision": 0.8,
32
+ "eval_recall": 0.09302325581395349,
33
+ "eval_runtime": 4.3688,
34
+ "eval_samples_per_second": 18.312,
35
+ "eval_steps_per_second": 2.289,
36
+ "step": 57
37
+ },
38
+ {
39
+ "epoch": 1.3157894736842106,
40
+ "grad_norm": 2.8000667095184326,
41
+ "learning_rate": 3.7e-07,
42
+ "loss": 0.6872,
43
+ "step": 75
44
+ },
45
+ {
46
+ "epoch": 1.7543859649122808,
47
+ "grad_norm": 3.0732340812683105,
48
+ "learning_rate": 4.95e-07,
49
+ "loss": 0.6721,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "eval_accuracy": 0.6,
55
+ "eval_f1": 0.4482758620689655,
56
+ "eval_loss": 0.6631786227226257,
57
+ "eval_precision": 0.8666666666666667,
58
+ "eval_recall": 0.3023255813953488,
59
+ "eval_runtime": 3.4954,
60
+ "eval_samples_per_second": 22.887,
61
+ "eval_steps_per_second": 2.861,
62
+ "step": 114
63
+ },
64
+ {
65
+ "epoch": 2.192982456140351,
66
+ "grad_norm": 5.148472309112549,
67
+ "learning_rate": 6.2e-07,
68
+ "loss": 0.6639,
69
+ "step": 125
70
+ },
71
+ {
72
+ "epoch": 2.6315789473684212,
73
+ "grad_norm": 4.3272223472595215,
74
+ "learning_rate": 7.45e-07,
75
+ "loss": 0.6423,
76
+ "step": 150
77
+ },
78
+ {
79
+ "epoch": 3.0,
80
+ "eval_accuracy": 0.9125,
81
+ "eval_f1": 0.9195402298850575,
82
+ "eval_loss": 0.6100292801856995,
83
+ "eval_precision": 0.9090909090909091,
84
+ "eval_recall": 0.9302325581395349,
85
+ "eval_runtime": 3.5042,
86
+ "eval_samples_per_second": 22.829,
87
+ "eval_steps_per_second": 2.854,
88
+ "step": 171
89
+ },
90
+ {
91
+ "epoch": 3.0701754385964914,
92
+ "grad_norm": 4.4310302734375,
93
+ "learning_rate": 8.699999999999999e-07,
94
+ "loss": 0.6113,
95
+ "step": 175
96
+ },
97
+ {
98
+ "epoch": 3.5087719298245617,
99
+ "grad_norm": 3.7883880138397217,
100
+ "learning_rate": 9.95e-07,
101
+ "loss": 0.587,
102
+ "step": 200
103
+ },
104
+ {
105
+ "epoch": 3.9473684210526314,
106
+ "grad_norm": 4.989749908447266,
107
+ "learning_rate": 1.12e-06,
108
+ "loss": 0.5664,
109
+ "step": 225
110
+ },
111
+ {
112
+ "epoch": 4.0,
113
+ "eval_accuracy": 0.9375,
114
+ "eval_f1": 0.945054945054945,
115
+ "eval_loss": 0.5416288375854492,
116
+ "eval_precision": 0.8958333333333334,
117
+ "eval_recall": 1.0,
118
+ "eval_runtime": 3.3989,
119
+ "eval_samples_per_second": 23.537,
120
+ "eval_steps_per_second": 2.942,
121
+ "step": 228
122
+ },
123
+ {
124
+ "epoch": 4.385964912280702,
125
+ "grad_norm": 4.706058979034424,
126
+ "learning_rate": 1.245e-06,
127
+ "loss": 0.533,
128
+ "step": 250
129
+ },
130
+ {
131
+ "epoch": 4.824561403508772,
132
+ "grad_norm": 4.749231338500977,
133
+ "learning_rate": 1.37e-06,
134
+ "loss": 0.5036,
135
+ "step": 275
136
+ },
137
+ {
138
+ "epoch": 5.0,
139
+ "eval_accuracy": 0.9625,
140
+ "eval_f1": 0.9662921348314607,
141
+ "eval_loss": 0.45674929022789,
142
+ "eval_precision": 0.9347826086956522,
143
+ "eval_recall": 1.0,
144
+ "eval_runtime": 3.3705,
145
+ "eval_samples_per_second": 23.735,
146
+ "eval_steps_per_second": 2.967,
147
+ "step": 285
148
+ },
149
+ {
150
+ "epoch": 5.2631578947368425,
151
+ "grad_norm": 5.614340782165527,
152
+ "learning_rate": 1.495e-06,
153
+ "loss": 0.451,
154
+ "step": 300
155
+ },
156
+ {
157
+ "epoch": 5.701754385964913,
158
+ "grad_norm": 3.3072502613067627,
159
+ "learning_rate": 1.6200000000000002e-06,
160
+ "loss": 0.4289,
161
+ "step": 325
162
+ },
163
+ {
164
+ "epoch": 6.0,
165
+ "eval_accuracy": 0.975,
166
+ "eval_f1": 0.9772727272727273,
167
+ "eval_loss": 0.3828332722187042,
168
+ "eval_precision": 0.9555555555555556,
169
+ "eval_recall": 1.0,
170
+ "eval_runtime": 3.3562,
171
+ "eval_samples_per_second": 23.836,
172
+ "eval_steps_per_second": 2.98,
173
+ "step": 342
174
+ },
175
+ {
176
+ "epoch": 6.140350877192983,
177
+ "grad_norm": 4.262056827545166,
178
+ "learning_rate": 1.745e-06,
179
+ "loss": 0.3868,
180
+ "step": 350
181
+ },
182
+ {
183
+ "epoch": 6.578947368421053,
184
+ "grad_norm": 2.7170867919921875,
185
+ "learning_rate": 1.8699999999999999e-06,
186
+ "loss": 0.3527,
187
+ "step": 375
188
+ },
189
+ {
190
+ "epoch": 7.0,
191
+ "eval_accuracy": 1.0,
192
+ "eval_f1": 1.0,
193
+ "eval_loss": 0.3189076781272888,
194
+ "eval_precision": 1.0,
195
+ "eval_recall": 1.0,
196
+ "eval_runtime": 3.349,
197
+ "eval_samples_per_second": 23.888,
198
+ "eval_steps_per_second": 2.986,
199
+ "step": 399
200
+ }
201
+ ],
202
+ "logging_steps": 25,
203
+ "max_steps": 1140,
204
+ "num_input_tokens_seen": 0,
205
+ "num_train_epochs": 20,
206
+ "save_steps": 500,
207
+ "stateful_callbacks": {
208
+ "EarlyStoppingCallback": {
209
+ "args": {
210
+ "early_stopping_patience": 5,
211
+ "early_stopping_threshold": 0.0
212
+ },
213
+ "attributes": {
214
+ "early_stopping_patience_counter": 0
215
+ }
216
+ },
217
+ "TrainerControl": {
218
+ "args": {
219
+ "should_epoch_stop": false,
220
+ "should_evaluate": false,
221
+ "should_log": false,
222
+ "should_save": true,
223
+ "should_training_stop": false
224
+ },
225
+ "attributes": {}
226
+ }
227
+ },
228
+ "total_flos": 828799824384000.0,
229
+ "train_batch_size": 8,
230
+ "trial_name": null,
231
+ "trial_params": null
232
+ }
checkpoint-399/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
3
+ size 5777
checkpoint-456/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }
checkpoint-456/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b2e953bb67f488c7a1107df218ca62baef18561db4745cb5444166fb8b41d55
3
+ size 437958648
checkpoint-456/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567989590572f110b4143eddfe2b7e09da0392fa8d4223d8d622fd32c7945f55
3
+ size 876033163
checkpoint-456/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b46fdb450f07235e10b0a3cbfc22c5b992fc3bc85e2c60f4f801ecd647891c56
3
+ size 14455
checkpoint-456/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6bbc54a1bfa58ad2f93c0f3c37d1fed1cefc747af9e48c028e8caf5b9c0a05
3
+ size 1465
checkpoint-456/trainer_state.json ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 399,
3
+ "best_metric": 1.0,
4
+ "best_model_checkpoint": "./ai_detector_model_v10/checkpoint-399",
5
+ "epoch": 8.0,
6
+ "eval_steps": 500,
7
+ "global_step": 456,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 2.53117299079895,
15
+ "learning_rate": 1.2000000000000002e-07,
16
+ "loss": 0.6905,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 3.241912841796875,
22
+ "learning_rate": 2.45e-07,
23
+ "loss": 0.6869,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5,
29
+ "eval_f1": 0.16666666666666666,
30
+ "eval_loss": 0.6886580586433411,
31
+ "eval_precision": 0.8,
32
+ "eval_recall": 0.09302325581395349,
33
+ "eval_runtime": 4.3688,
34
+ "eval_samples_per_second": 18.312,
35
+ "eval_steps_per_second": 2.289,
36
+ "step": 57
37
+ },
38
+ {
39
+ "epoch": 1.3157894736842106,
40
+ "grad_norm": 2.8000667095184326,
41
+ "learning_rate": 3.7e-07,
42
+ "loss": 0.6872,
43
+ "step": 75
44
+ },
45
+ {
46
+ "epoch": 1.7543859649122808,
47
+ "grad_norm": 3.0732340812683105,
48
+ "learning_rate": 4.95e-07,
49
+ "loss": 0.6721,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "eval_accuracy": 0.6,
55
+ "eval_f1": 0.4482758620689655,
56
+ "eval_loss": 0.6631786227226257,
57
+ "eval_precision": 0.8666666666666667,
58
+ "eval_recall": 0.3023255813953488,
59
+ "eval_runtime": 3.4954,
60
+ "eval_samples_per_second": 22.887,
61
+ "eval_steps_per_second": 2.861,
62
+ "step": 114
63
+ },
64
+ {
65
+ "epoch": 2.192982456140351,
66
+ "grad_norm": 5.148472309112549,
67
+ "learning_rate": 6.2e-07,
68
+ "loss": 0.6639,
69
+ "step": 125
70
+ },
71
+ {
72
+ "epoch": 2.6315789473684212,
73
+ "grad_norm": 4.3272223472595215,
74
+ "learning_rate": 7.45e-07,
75
+ "loss": 0.6423,
76
+ "step": 150
77
+ },
78
+ {
79
+ "epoch": 3.0,
80
+ "eval_accuracy": 0.9125,
81
+ "eval_f1": 0.9195402298850575,
82
+ "eval_loss": 0.6100292801856995,
83
+ "eval_precision": 0.9090909090909091,
84
+ "eval_recall": 0.9302325581395349,
85
+ "eval_runtime": 3.5042,
86
+ "eval_samples_per_second": 22.829,
87
+ "eval_steps_per_second": 2.854,
88
+ "step": 171
89
+ },
90
+ {
91
+ "epoch": 3.0701754385964914,
92
+ "grad_norm": 4.4310302734375,
93
+ "learning_rate": 8.699999999999999e-07,
94
+ "loss": 0.6113,
95
+ "step": 175
96
+ },
97
+ {
98
+ "epoch": 3.5087719298245617,
99
+ "grad_norm": 3.7883880138397217,
100
+ "learning_rate": 9.95e-07,
101
+ "loss": 0.587,
102
+ "step": 200
103
+ },
104
+ {
105
+ "epoch": 3.9473684210526314,
106
+ "grad_norm": 4.989749908447266,
107
+ "learning_rate": 1.12e-06,
108
+ "loss": 0.5664,
109
+ "step": 225
110
+ },
111
+ {
112
+ "epoch": 4.0,
113
+ "eval_accuracy": 0.9375,
114
+ "eval_f1": 0.945054945054945,
115
+ "eval_loss": 0.5416288375854492,
116
+ "eval_precision": 0.8958333333333334,
117
+ "eval_recall": 1.0,
118
+ "eval_runtime": 3.3989,
119
+ "eval_samples_per_second": 23.537,
120
+ "eval_steps_per_second": 2.942,
121
+ "step": 228
122
+ },
123
+ {
124
+ "epoch": 4.385964912280702,
125
+ "grad_norm": 4.706058979034424,
126
+ "learning_rate": 1.245e-06,
127
+ "loss": 0.533,
128
+ "step": 250
129
+ },
130
+ {
131
+ "epoch": 4.824561403508772,
132
+ "grad_norm": 4.749231338500977,
133
+ "learning_rate": 1.37e-06,
134
+ "loss": 0.5036,
135
+ "step": 275
136
+ },
137
+ {
138
+ "epoch": 5.0,
139
+ "eval_accuracy": 0.9625,
140
+ "eval_f1": 0.9662921348314607,
141
+ "eval_loss": 0.45674929022789,
142
+ "eval_precision": 0.9347826086956522,
143
+ "eval_recall": 1.0,
144
+ "eval_runtime": 3.3705,
145
+ "eval_samples_per_second": 23.735,
146
+ "eval_steps_per_second": 2.967,
147
+ "step": 285
148
+ },
149
+ {
150
+ "epoch": 5.2631578947368425,
151
+ "grad_norm": 5.614340782165527,
152
+ "learning_rate": 1.495e-06,
153
+ "loss": 0.451,
154
+ "step": 300
155
+ },
156
+ {
157
+ "epoch": 5.701754385964913,
158
+ "grad_norm": 3.3072502613067627,
159
+ "learning_rate": 1.6200000000000002e-06,
160
+ "loss": 0.4289,
161
+ "step": 325
162
+ },
163
+ {
164
+ "epoch": 6.0,
165
+ "eval_accuracy": 0.975,
166
+ "eval_f1": 0.9772727272727273,
167
+ "eval_loss": 0.3828332722187042,
168
+ "eval_precision": 0.9555555555555556,
169
+ "eval_recall": 1.0,
170
+ "eval_runtime": 3.3562,
171
+ "eval_samples_per_second": 23.836,
172
+ "eval_steps_per_second": 2.98,
173
+ "step": 342
174
+ },
175
+ {
176
+ "epoch": 6.140350877192983,
177
+ "grad_norm": 4.262056827545166,
178
+ "learning_rate": 1.745e-06,
179
+ "loss": 0.3868,
180
+ "step": 350
181
+ },
182
+ {
183
+ "epoch": 6.578947368421053,
184
+ "grad_norm": 2.7170867919921875,
185
+ "learning_rate": 1.8699999999999999e-06,
186
+ "loss": 0.3527,
187
+ "step": 375
188
+ },
189
+ {
190
+ "epoch": 7.0,
191
+ "eval_accuracy": 1.0,
192
+ "eval_f1": 1.0,
193
+ "eval_loss": 0.3189076781272888,
194
+ "eval_precision": 1.0,
195
+ "eval_recall": 1.0,
196
+ "eval_runtime": 3.349,
197
+ "eval_samples_per_second": 23.888,
198
+ "eval_steps_per_second": 2.986,
199
+ "step": 399
200
+ },
201
+ {
202
+ "epoch": 7.017543859649122,
203
+ "grad_norm": 1.8840763568878174,
204
+ "learning_rate": 1.995e-06,
205
+ "loss": 0.3292,
206
+ "step": 400
207
+ },
208
+ {
209
+ "epoch": 7.456140350877193,
210
+ "grad_norm": 2.5557754039764404,
211
+ "learning_rate": 2.12e-06,
212
+ "loss": 0.3073,
213
+ "step": 425
214
+ },
215
+ {
216
+ "epoch": 7.894736842105263,
217
+ "grad_norm": 2.818211555480957,
218
+ "learning_rate": 2.245e-06,
219
+ "loss": 0.2884,
220
+ "step": 450
221
+ },
222
+ {
223
+ "epoch": 8.0,
224
+ "eval_accuracy": 1.0,
225
+ "eval_f1": 1.0,
226
+ "eval_loss": 0.2968425154685974,
227
+ "eval_precision": 1.0,
228
+ "eval_recall": 1.0,
229
+ "eval_runtime": 3.4168,
230
+ "eval_samples_per_second": 23.414,
231
+ "eval_steps_per_second": 2.927,
232
+ "step": 456
233
+ }
234
+ ],
235
+ "logging_steps": 25,
236
+ "max_steps": 1140,
237
+ "num_input_tokens_seen": 0,
238
+ "num_train_epochs": 20,
239
+ "save_steps": 500,
240
+ "stateful_callbacks": {
241
+ "EarlyStoppingCallback": {
242
+ "args": {
243
+ "early_stopping_patience": 5,
244
+ "early_stopping_threshold": 0.0
245
+ },
246
+ "attributes": {
247
+ "early_stopping_patience_counter": 1
248
+ }
249
+ },
250
+ "TrainerControl": {
251
+ "args": {
252
+ "should_epoch_stop": false,
253
+ "should_evaluate": false,
254
+ "should_log": false,
255
+ "should_save": true,
256
+ "should_training_stop": false
257
+ },
258
+ "attributes": {}
259
+ }
260
+ },
261
+ "total_flos": 947199799296000.0,
262
+ "train_batch_size": 8,
263
+ "trial_name": null,
264
+ "trial_params": null
265
+ }
checkpoint-456/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63faf219838a9e56f129bb219f11f7d21de055610c1d54c74fe871a46d5bf77a
3
+ size 5777
checkpoint-513/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Human",
14
+ "1": "AI"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "AI": 1,
20
+ "Human": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "transformers_version": "4.57.3",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 30522
33
+ }