FiveC commited on
Commit
15306ac
·
verified ·
1 Parent(s): d6b91cc

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14aa548ee07ab31052157712a9e0da885e6a07b960f6e223e74213cb1443e6fa
3
  size 1219380555
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909bf20fd33dfc2259e4df422f1bda32e44a0b9bf3d95c7a7e236b3464d3eff8
3
  size 1219380555
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:550d262c1ae5d8f37d1984f58337c1c2fc94034c0ddfc220eff610ba015afb5f
3
  size 652423827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3f8f5541517ae2c219789435c043a408058ae7f5c6d1ac4aec5f2156a0019a
3
  size 652423827
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c90dc5c3c5d48b21df844d28ac5f4dbdc02f7f7e7446ee48fcb6f3327de872fd
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baa26f0a90138d1c3ea988ee3460c5f3e821f7b8e23c1f705aa1f04cf0e90744
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:030ce4e896dde4e5df8ae1c595b2e24f4e2c44ea8055948beb08188b9a047014
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba889a9edbbc48a836dabf466c2ef5e898237108147ba44abda4201a588a35a8
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,49 +1,76 @@
1
  {
2
- "best_metric": 1.3624887493510058,
3
- "best_model_checkpoint": "./routed_longformer_docmt/checkpoint-36",
4
- "epoch": 2.88,
5
  "eval_steps": 500,
6
- "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.96,
13
- "eval_bleu": 0.7649978858780119,
14
- "eval_chrf": 15.452986698532497,
15
- "eval_comet": 0.24576674342155458,
16
- "eval_loss": 3.4590535163879395,
17
- "eval_runtime": 31.0399,
18
- "eval_samples_per_second": 1.611,
19
- "eval_steps_per_second": 0.226,
20
- "step": 12
21
  },
22
  {
23
- "epoch": 2.0,
24
- "eval_bleu": 1.2647603736034783,
25
- "eval_chrf": 16.316051179572895,
26
- "eval_comet": 0.23885925143957137,
27
- "eval_loss": 3.3582441806793213,
28
- "eval_runtime": 31.8746,
29
- "eval_samples_per_second": 1.569,
30
- "eval_steps_per_second": 0.22,
31
- "step": 25
32
  },
33
  {
34
- "epoch": 2.88,
35
- "eval_bleu": 1.3624887493510058,
36
- "eval_chrf": 16.339390714055682,
37
- "eval_comet": 0.23527576833963393,
38
- "eval_loss": 3.3164820671081543,
39
- "eval_runtime": 31.9738,
40
- "eval_samples_per_second": 1.564,
41
- "eval_steps_per_second": 0.219,
42
- "step": 36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
  ],
45
  "logging_steps": 100,
46
- "max_steps": 36,
47
  "num_input_tokens_seen": 0,
48
  "num_train_epochs": 3,
49
  "save_steps": 500,
@@ -54,7 +81,7 @@
54
  "should_evaluate": false,
55
  "should_log": false,
56
  "should_save": true,
57
- "should_training_stop": true
58
  },
59
  "attributes": {}
60
  }
 
1
  {
2
+ "best_metric": 19.45676397235889,
3
+ "best_model_checkpoint": "./routed_longformer_docmt/checkpoint-786",
4
+ "epoch": 0.9993642720915448,
5
  "eval_steps": 500,
6
+ "global_step": 786,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.12714558169103624,
13
+ "grad_norm": 2.7216010093688965,
14
+ "learning_rate": 2.875318066157761e-05,
15
+ "loss": 3.5051,
16
+ "step": 100
 
 
 
 
17
  },
18
  {
19
+ "epoch": 0.25429116338207247,
20
+ "grad_norm": 2.2804791927337646,
21
+ "learning_rate": 2.7480916030534352e-05,
22
+ "loss": 2.6407,
23
+ "step": 200
 
 
 
 
24
  },
25
  {
26
+ "epoch": 0.3814367450731087,
27
+ "grad_norm": 2.807426691055298,
28
+ "learning_rate": 2.6208651399491097e-05,
29
+ "loss": 2.3397,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.5085823267641449,
34
+ "grad_norm": 1.993668556213379,
35
+ "learning_rate": 2.4936386768447838e-05,
36
+ "loss": 2.1398,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.6357279084551812,
41
+ "grad_norm": 2.3265788555145264,
42
+ "learning_rate": 2.3664122137404583e-05,
43
+ "loss": 2.0065,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.7628734901462174,
48
+ "grad_norm": 1.7298423051834106,
49
+ "learning_rate": 2.2391857506361324e-05,
50
+ "loss": 1.8887,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.8900190718372537,
55
+ "grad_norm": 2.18813419342041,
56
+ "learning_rate": 2.1119592875318066e-05,
57
+ "loss": 1.8264,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.9993642720915448,
62
+ "eval_bleu": 19.45676397235889,
63
+ "eval_chrf": 53.07001767638457,
64
+ "eval_comet": 0.4521310674039932,
65
+ "eval_loss": 1.5774508714675903,
66
+ "eval_runtime": 710.5738,
67
+ "eval_samples_per_second": 1.967,
68
+ "eval_steps_per_second": 0.246,
69
+ "step": 786
70
  }
71
  ],
72
  "logging_steps": 100,
73
+ "max_steps": 2358,
74
  "num_input_tokens_seen": 0,
75
  "num_train_epochs": 3,
76
  "save_steps": 500,
 
81
  "should_evaluate": false,
82
  "should_log": false,
83
  "should_save": true,
84
+ "should_training_stop": false
85
  },
86
  "attributes": {}
87
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf242734023e796808cd7ba63988f57ad6a54ba45632e7e45726f5e59fb4ed9c
3
  size 5713
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2f0c57289e38a06b3be61d14dc67c7ac6473c96a88fcc15db5bf382d46f7a10
3
  size 5713