ccore commited on
Commit
bd744fc
·
verified ·
1 Parent(s): c6faa49

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:180799923d60094382644c54290e59e477207941e6639fc451e41236d3e1eb4a
3
  size 1447317080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:622ceda914025a3cb6c3aea861afbd414ca14e4ada07549a490403bb60ed436e
3
  size 1447317080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d29276ef613a478783a8b31a7b2c7f90cd8b1790f90084070a56b470439ccc13
3
  size 2894813242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e7ab6da7bbcf285ba92b6a319955403be98c63cb8e5f808e2fac2b858c00b04
3
  size 2894813242
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e1a0159d4af8f2aff362ffe79d7838d580f34554f42bcc8b39206c91c36519
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:230edc29c7f4d0d371abd36f9f7bae2a11f65b418f970748364cb37008e3f3ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.3892712891101837,
3
- "best_model_checkpoint": "./opt_trained2/checkpoint-2409",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 2409,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -59,6 +59,28 @@
59
  "eval_samples_per_second": 10.439,
60
  "eval_steps_per_second": 2.61,
61
  "step": 2409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 500,
@@ -78,7 +100,7 @@
78
  "attributes": {}
79
  }
80
  },
81
- "total_flos": 9.386164249704e+16,
82
  "train_batch_size": 4,
83
  "trial_name": null,
84
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.37884894013404846,
3
+ "best_model_checkpoint": "./opt_trained2/checkpoint-3212",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 3212,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
59
  "eval_samples_per_second": 10.439,
60
  "eval_steps_per_second": 2.61,
61
  "step": 2409
62
+ },
63
+ {
64
+ "epoch": 3.1133250311332503,
65
+ "grad_norm": 0.4317278563976288,
66
+ "learning_rate": 0.0003378331257783313,
67
+ "loss": 3.0505,
68
+ "step": 2500
69
+ },
70
+ {
71
+ "epoch": 3.7359900373599,
72
+ "grad_norm": 0.3730609118938446,
73
+ "learning_rate": 0.00032537982565379825,
74
+ "loss": 2.9785,
75
+ "step": 3000
76
+ },
77
+ {
78
+ "epoch": 4.0,
79
+ "eval_loss": 0.37884894013404846,
80
+ "eval_runtime": 273.844,
81
+ "eval_samples_per_second": 10.429,
82
+ "eval_steps_per_second": 2.607,
83
+ "step": 3212
84
  }
85
  ],
86
  "logging_steps": 500,
 
100
  "attributes": {}
101
  }
102
  },
103
+ "total_flos": 1.2514885666272e+17,
104
  "train_batch_size": 4,
105
  "trial_name": null,
106
  "trial_params": null