Mels22 commited on
Commit
07225e3
·
verified ·
1 Parent(s): 658814d

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca94817db72d0e01e355a165a8b4e1af9fbe59e283c7d7f5c77cedf571ab407
3
  size 5687360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599d9f1c9b039546505a4690f92c0ca34439708128873aeefdf3c298d02a38e8
3
  size 5687360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4055879a8af4ed670e5ae958567a5cf0cf868bbcf34153b2b96e9a8c3015f7f1
3
  size 154536948
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ca44b66cda373206a7924cdfb22cf24b665f8aa42f8ff176882de5d0eb0462
3
  size 154536948
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b726c873ee1e0285a31b2d6e8aa7348232298bcb52b3df38302b4af9c97f2172
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805b952d6ed276148c3b93345e8330ad707ed6e8acffefe39b19d4e93e713656
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46c9a89756eab4cc469f92a3efb736bac5da229eda0a7d5c4d203923562b5961
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf87d2f6e922b4fc58db5e9445c0aa14b6455c6deeb769b4ec03d2f176f13118
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0,
6
  "eval_steps": 500,
7
- "global_step": 72,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -29,6 +29,48 @@
29
  "learning_rate": 0.0004400347115466442,
30
  "loss": 1.5577,
31
  "step": 66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "logging_steps": 22,
@@ -43,12 +85,12 @@
43
  "should_evaluate": false,
44
  "should_log": false,
45
  "should_save": true,
46
- "should_training_stop": false
47
  },
48
  "attributes": {}
49
  }
50
  },
51
- "total_flos": 1.2646681266880512e+16,
52
  "train_batch_size": 4,
53
  "trial_name": null,
54
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.961672473867596,
6
  "eval_steps": 500,
7
+ "global_step": 213,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
29
  "learning_rate": 0.0004400347115466442,
30
  "loss": 1.5577,
31
  "step": 66
32
+ },
33
+ {
34
+ "epoch": 1.2229965156794425,
35
+ "grad_norm": 0.605900228023529,
36
+ "learning_rate": 0.00037022215582942734,
37
+ "loss": 1.5324,
38
+ "step": 88
39
+ },
40
+ {
41
+ "epoch": 1.529616724738676,
42
+ "grad_norm": 0.5823822021484375,
43
+ "learning_rate": 0.00028483851926153393,
44
+ "loss": 1.4878,
45
+ "step": 110
46
+ },
47
+ {
48
+ "epoch": 1.8362369337979094,
49
+ "grad_norm": 1.2892831563949585,
50
+ "learning_rate": 0.0001949426245628773,
51
+ "loss": 1.5933,
52
+ "step": 132
53
+ },
54
+ {
55
+ "epoch": 2.1393728222996518,
56
+ "grad_norm": 0.6070840954780579,
57
+ "learning_rate": 0.00011217771864447396,
58
+ "loss": 1.3866,
59
+ "step": 154
60
+ },
61
+ {
62
+ "epoch": 2.445993031358885,
63
+ "grad_norm": 0.5739336609840393,
64
+ "learning_rate": 4.7263448253322574e-05,
65
+ "loss": 1.4519,
66
+ "step": 176
67
+ },
68
+ {
69
+ "epoch": 2.7526132404181185,
70
+ "grad_norm": 0.5276680588722229,
71
+ "learning_rate": 8.607459597809564e-06,
72
+ "loss": 1.4926,
73
+ "step": 198
74
  }
75
  ],
76
  "logging_steps": 22,
 
85
  "should_evaluate": false,
86
  "should_log": false,
87
  "should_save": true,
88
+ "should_training_stop": true
89
  },
90
  "attributes": {}
91
  }
92
  },
93
+ "total_flos": 3.746593107659981e+16,
94
  "train_batch_size": 4,
95
  "trial_name": null,
96
  "trial_params": null