iqbalpurba26 commited on
Commit
ef27c77
·
verified ·
1 Parent(s): 3de4893

Upload checkpoint-1524

Browse files
Files changed (7) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +45 -45
  7. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5afe85109b34d54bcdfcd043a611925a1c6d60bbb4981d1cb9762832ae057cb6
3
  size 1112205008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59f4c092752f656d7a076bd6d75f9dc1396b03c1c2310d084eb38ea6ca68e08e
3
  size 1112205008
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37961b0010459a533447ff73c868d6c8d9ea78223e30f7039eee512ed9db92e
3
  size 2224532875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7e1ef0dd71e8376937aebde0b6b9decdd1fff7ad00ce791495499f2c5a99965
3
  size 2224532875
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9468bea5147f89b99cb8342b2c15e9c9fd4431e9b3dcb297e5ffa0319bad9539
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686d66298440cd74433f9965f0ac9b35e9e9cc81f06be802f4209cb45ed608f5
3
  size 14645
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de9873f44982749c625477131e8a40ba78f0297c0bc9d5f5c128d769935154aa
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aef935e098b8ac946068ed61f5c6a616a24e161d5e715824cc90693056d84481
3
  size 1383
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ee3ddce54887ddb31fdf9ce85d46f40a6644164a1ce7fc32da5986665968d40
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a28f8cafb3c310082af0a8d650de632e1dc1d6ee3b028d80847d18180123f7
3
  size 1465
trainer_state.json CHANGED
@@ -1,74 +1,74 @@
1
  {
2
- "best_global_step": 1365,
3
- "best_metric": 0.8721528265204077,
4
- "best_model_checkpoint": "../model/checkpoint-1365",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 1365,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 12.363642692565918,
15
- "learning_rate": 1.3347985347985349e-05,
16
- "loss": 0.4356,
17
- "step": 455
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_f1_macro": 0.8346323361157264,
22
- "eval_f1_micro": 0.8334897691008072,
23
- "eval_loss": 0.30361834168434143,
24
- "eval_precision": 0.8729846637829335,
25
- "eval_recall": 0.7974137931034483,
26
- "eval_runtime": 13.9315,
27
- "eval_samples_per_second": 261.135,
28
- "eval_steps_per_second": 4.091,
29
- "step": 455
30
  },
31
  {
32
  "epoch": 2.0,
33
- "grad_norm": 8.248332023620605,
34
- "learning_rate": 6.681318681318681e-06,
35
- "loss": 0.2774,
36
- "step": 910
37
  },
38
  {
39
  "epoch": 2.0,
40
- "eval_f1_macro": 0.8615056948788721,
41
- "eval_f1_micro": 0.8600631852815461,
42
- "eval_loss": 0.2571789026260376,
43
- "eval_precision": 0.8910281093569503,
44
- "eval_recall": 0.8311781609195402,
45
- "eval_runtime": 14.3265,
46
- "eval_samples_per_second": 253.935,
47
- "eval_steps_per_second": 3.979,
48
- "step": 910
49
  },
50
  {
51
  "epoch": 3.0,
52
- "grad_norm": 8.038055419921875,
53
- "learning_rate": 1.4652014652014653e-08,
54
- "loss": 0.221,
55
- "step": 1365
56
  },
57
  {
58
  "epoch": 3.0,
59
- "eval_f1_macro": 0.8721528265204077,
60
- "eval_f1_micro": 0.8710601719197708,
61
- "eval_loss": 0.25890466570854187,
62
- "eval_precision": 0.8685714285714285,
63
- "eval_recall": 0.8735632183908046,
64
- "eval_runtime": 13.9158,
65
- "eval_samples_per_second": 261.429,
66
- "eval_steps_per_second": 4.096,
67
- "step": 1365
68
  }
69
  ],
70
  "logging_steps": 500,
71
- "max_steps": 1365,
72
  "num_input_tokens_seen": 0,
73
  "num_train_epochs": 3,
74
  "save_steps": 500,
@@ -84,7 +84,7 @@
84
  "attributes": {}
85
  }
86
  },
87
- "total_flos": 1.148400823329792e+16,
88
  "train_batch_size": 32,
89
  "trial_name": null,
90
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1524,
3
+ "best_metric": 0.8752606955526183,
4
+ "best_model_checkpoint": "../models/checkpoint-1524",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 1524,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 6.651031970977783,
15
+ "learning_rate": 1.3346456692913388e-05,
16
+ "loss": 0.4228,
17
+ "step": 508
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_f1_macro": 0.843772475820936,
22
+ "eval_f1_micro": 0.8440423247968103,
23
+ "eval_loss": 0.30257782340049744,
24
+ "eval_precision": 0.810126582278481,
25
+ "eval_recall": 0.8809218950064021,
26
+ "eval_runtime": 15.8588,
27
+ "eval_samples_per_second": 256.072,
28
+ "eval_steps_per_second": 4.036,
29
+ "step": 508
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 7.698607444763184,
34
+ "learning_rate": 6.6797900262467195e-06,
35
+ "loss": 0.2626,
36
+ "step": 1016
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_f1_macro": 0.8678396468883778,
41
+ "eval_f1_micro": 0.8675043741052967,
42
+ "eval_loss": 0.2636893689632416,
43
+ "eval_precision": 0.8621561808409738,
44
+ "eval_recall": 0.8729193341869398,
45
+ "eval_runtime": 15.5885,
46
+ "eval_samples_per_second": 260.513,
47
+ "eval_steps_per_second": 4.106,
48
+ "step": 1016
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "grad_norm": 13.880615234375,
53
+ "learning_rate": 1.3123359580052495e-08,
54
+ "loss": 0.2073,
55
+ "step": 1524
56
  },
57
  {
58
  "epoch": 3.0,
59
+ "eval_f1_macro": 0.8752606955526183,
60
+ "eval_f1_micro": 0.8743307086614174,
61
+ "eval_loss": 0.2604271173477173,
62
+ "eval_precision": 0.8605083694978302,
63
+ "eval_recall": 0.8886043533930857,
64
+ "eval_runtime": 15.6076,
65
+ "eval_samples_per_second": 260.194,
66
+ "eval_steps_per_second": 4.101,
67
+ "step": 1524
68
  }
69
  ],
70
  "logging_steps": 500,
71
+ "max_steps": 1524,
72
  "num_input_tokens_seen": 0,
73
  "num_train_epochs": 3,
74
  "save_steps": 500,
 
84
  "attributes": {}
85
  }
86
  },
87
+ "total_flos": 1.282113861663744e+16,
88
  "train_batch_size": 32,
89
  "trial_name": null,
90
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ece358947c1a7f05b113e7f0bdd313472cf78504cee40430b7e5a6fbfdf8fb62
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8688d63c0a526f909aa428b1b210ff6f9ee2fd5ca3a957e8eadfcf436359f3e
3
  size 5841