seanfarrell commited on
Commit
1af7039
·
verified ·
1 Parent(s): 42777a5

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +67 -60
  6. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3db702d2e68876cab46976c836710ff38917f1da1c2ec63db4b90aea0f31bfad
3
  size 430935892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:474b993621dcf0bddfe1abd8c429bde1a78e3cd28d23183ebdc1cfb5f798f9fb
3
  size 430935892
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c1c9ff4bb5264948e1e60bae8d14def3974dd4e7f386d9137099068800cfbc8
3
  size 861991482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3373cbddd58f49d7ee1bee615e1e87f0d764904fd125e2f94c5c1ca6b82737
3
  size 861991482
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2bc70652e6ef170320bcca82afe9a9f4bdf2996075a63ec8a251ac321429afb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df8904498d0ea4278d68a668aa846964ce80df1ff9136c37871ee274669dc57f
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c7c30b78cec564dd03bf88837e697ae6ef4fcc7a8780b9e87c556a24173894f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9de563a795946068ee2943aa801ab2757eb65d8f36f8743830ce3a1cd4b5ce
3
  size 1064
trainer_state.json CHANGED
@@ -1,93 +1,100 @@
1
  {
2
- "best_global_step": 476,
3
- "best_metric": 0.010661174543201923,
4
- "best_model_checkpoint": "projects/PetBERT_annonymisation/data/augment/checkpoint-476",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
- "global_step": 1904,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_f1": 0.035302169832600705,
15
- "eval_loss": 2.312753200531006,
16
- "eval_precision": 0.09090508818188361,
17
- "eval_recall": 0.06752732472754598,
18
- "eval_runtime": 12.3558,
19
- "eval_samples_per_second": 134.107,
20
- "eval_steps_per_second": 4.209,
21
  "step": 0
22
  },
 
 
 
 
 
 
 
23
  {
24
  "epoch": 1.0,
25
- "eval_f1": 0.8995538111348774,
26
- "eval_loss": 0.010661174543201923,
27
- "eval_precision": 0.8815916042183172,
28
- "eval_recall": 0.9190286419243201,
29
- "eval_runtime": 12.59,
30
- "eval_samples_per_second": 131.612,
31
- "eval_steps_per_second": 4.13,
32
- "step": 476
33
  },
34
  {
35
- "epoch": 1.050420168067227,
36
- "grad_norm": 0.16209110617637634,
37
- "learning_rate": 4.994758403361345e-05,
38
- "loss": 0.0477,
39
- "step": 500
40
  },
41
  {
42
  "epoch": 2.0,
43
- "eval_f1": 0.9230744441736807,
44
- "eval_loss": 0.010812721215188503,
45
- "eval_precision": 0.8937084772102274,
46
- "eval_recall": 0.9571896846985819,
47
- "eval_runtime": 12.587,
48
- "eval_samples_per_second": 131.644,
49
- "eval_steps_per_second": 4.131,
50
- "step": 952
51
  },
52
  {
53
- "epoch": 2.100840336134454,
54
- "grad_norm": 0.012455416843295097,
55
- "learning_rate": 4.989506302521009e-05,
56
- "loss": 0.0056,
57
- "step": 1000
58
  },
59
  {
60
  "epoch": 3.0,
61
- "eval_f1": 0.9348757835672437,
62
- "eval_loss": 0.01099320687353611,
63
- "eval_precision": 0.9214871357949422,
64
- "eval_recall": 0.9494909444630193,
65
- "eval_runtime": 12.579,
66
- "eval_samples_per_second": 131.728,
67
- "eval_steps_per_second": 4.134,
68
- "step": 1428
69
  },
70
  {
71
- "epoch": 3.1512605042016806,
72
- "grad_norm": 0.015093757770955563,
73
- "learning_rate": 4.9842542016806724e-05,
74
- "loss": 0.0029,
75
- "step": 1500
76
  },
77
  {
78
  "epoch": 4.0,
79
- "eval_f1": 0.9105180723758415,
80
- "eval_loss": 0.012333991006016731,
81
- "eval_precision": 0.8738638072175106,
82
- "eval_recall": 0.9524189376430204,
83
- "eval_runtime": 12.6079,
84
- "eval_samples_per_second": 131.426,
85
- "eval_steps_per_second": 4.124,
86
- "step": 1904
87
  }
88
  ],
89
  "logging_steps": 500,
90
- "max_steps": 476000,
91
  "num_input_tokens_seen": 0,
92
  "num_train_epochs": 1000,
93
  "save_steps": 500,
@@ -112,7 +119,7 @@
112
  "attributes": {}
113
  }
114
  },
115
- "total_flos": 1.5921584203628544e+16,
116
  "train_batch_size": 32,
117
  "trial_name": null,
118
  "trial_params": null
 
1
  {
2
+ "best_global_step": 609,
3
+ "best_metric": 0.010163484141230583,
4
+ "best_model_checkpoint": "projects/PetBERT_annonymisation/data/augment/arrow_4/checkpoint-609",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
+ "global_step": 2436,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_f1": 0.0125291815966952,
15
+ "eval_loss": 2.3777594566345215,
16
+ "eval_precision": 0.09023052095482559,
17
+ "eval_recall": 0.0881670915774344,
18
+ "eval_runtime": 12.1157,
19
+ "eval_samples_per_second": 136.764,
20
+ "eval_steps_per_second": 4.292,
21
  "step": 0
22
  },
23
+ {
24
+ "epoch": 0.8210180623973727,
25
+ "grad_norm": 0.28877386450767517,
26
+ "learning_rate": 4.9959031198686376e-05,
27
+ "loss": 0.0484,
28
+ "step": 500
29
+ },
30
  {
31
  "epoch": 1.0,
32
+ "eval_f1": 0.9208551148897809,
33
+ "eval_loss": 0.010163484141230583,
34
+ "eval_precision": 0.9173087088894764,
35
+ "eval_recall": 0.9271621325936177,
36
+ "eval_runtime": 12.5384,
37
+ "eval_samples_per_second": 132.154,
38
+ "eval_steps_per_second": 4.147,
39
+ "step": 609
40
  },
41
  {
42
+ "epoch": 1.6420361247947455,
43
+ "grad_norm": 0.3570762574672699,
44
+ "learning_rate": 4.99179802955665e-05,
45
+ "loss": 0.0065,
46
+ "step": 1000
47
  },
48
  {
49
  "epoch": 2.0,
50
+ "eval_f1": 0.9295632654722351,
51
+ "eval_loss": 0.010717815719544888,
52
+ "eval_precision": 0.9179765798631134,
53
+ "eval_recall": 0.9419352898275708,
54
+ "eval_runtime": 12.5527,
55
+ "eval_samples_per_second": 132.004,
56
+ "eval_steps_per_second": 4.143,
57
+ "step": 1218
58
  },
59
  {
60
+ "epoch": 2.4630541871921183,
61
+ "grad_norm": 0.006953490898013115,
62
+ "learning_rate": 4.987692939244664e-05,
63
+ "loss": 0.0033,
64
+ "step": 1500
65
  },
66
  {
67
  "epoch": 3.0,
68
+ "eval_f1": 0.9048958394614014,
69
+ "eval_loss": 0.013671835884451866,
70
+ "eval_precision": 0.8760752650167597,
71
+ "eval_recall": 0.9419362439194514,
72
+ "eval_runtime": 12.5648,
73
+ "eval_samples_per_second": 131.877,
74
+ "eval_steps_per_second": 4.139,
75
+ "step": 1827
76
  },
77
  {
78
+ "epoch": 3.284072249589491,
79
+ "grad_norm": 0.03194739297032356,
80
+ "learning_rate": 4.983587848932677e-05,
81
+ "loss": 0.0024,
82
+ "step": 2000
83
  },
84
  {
85
  "epoch": 4.0,
86
+ "eval_f1": 0.926800098647886,
87
+ "eval_loss": 0.013943095691502094,
88
+ "eval_precision": 0.9002957711004475,
89
+ "eval_recall": 0.9554729790053276,
90
+ "eval_runtime": 12.6182,
91
+ "eval_samples_per_second": 131.319,
92
+ "eval_steps_per_second": 4.121,
93
+ "step": 2436
94
  }
95
  ],
96
  "logging_steps": 500,
97
+ "max_steps": 609000,
98
  "num_input_tokens_seen": 0,
99
  "num_train_epochs": 1000,
100
  "save_steps": 500,
 
119
  "attributes": {}
120
  }
121
  },
122
+ "total_flos": 2.035249251765043e+16,
123
  "train_batch_size": 32,
124
  "trial_name": null,
125
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b027a28b72c517e30a4a077c26526840f9f8e189cb0dc1de6469ad0645aeeb7
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d352a09e2aecd9422bdcc1bf58522262d82cd9de793273e937f615b15344c1
3
  size 5304