SystemAdmin123 commited on
Commit
2f97fa6
·
verified ·
1 Parent(s): befb28e

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb99c0df34a0990c299c163035be2fb4daf7d4ddf92bd71ff12452ee00d8eed4
3
  size 4874664552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b5c20dc59c769d88fe81b2b0843f23d2dd86b47b4591ef40bad9115cd2b4385
3
  size 4874664552
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e17d26806004f4fc8e683971c9e111440b1133e6e456169fb6d9758dec3547f8
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8ef29282721049bf0104c9d2701bfa26a60707403dbad153e661e9df28751f
3
  size 4932751008
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db4a8a5f3541d2e9c54777c371f061e537cd129e88574f60504c65c9017a2cbd
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a175e2a7592da2781889f8d417906512e54b2c4cf1831f1316e119d4722001f4
3
  size 4330865200
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc20da2dc721c6264368aa8ab9e1eac56b8548a6af31018945509c6e24543b50
3
  size 1086998656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d149d0190b4a1e8c630935731679b382222057f720f060d7786d7b42e66238bd
3
  size 1086998656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce81038539f57e07cbf37c2a53fbd8d8b4db7aa59bc8bd7dfaf9ead4ebe305d7
3
  size 15465450874
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbd3cf7e9f20636c638ad5a407323f188cfb4d34ff7182f544b0b533f9bad084
3
  size 15465450874
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9eb6f771276ba5237f3c355248bddcc13c4d374f8d097f5efd371e951b386413
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2667eee8d875561f6539074432e0dd0e4895799b436b03ff4f88447a79b23c54
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e43e2ec7859286350f64460ad789f06c3de5222106d1d96c1b94c005e151a9
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f453d97040b5753e846b52c3f3f2ec936ae564e3d843c2334ebea047a9d227d
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b08017151a2dbd696b41d5d87ccd01e5a71ba305ffc7340d5c94388204955632
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d7e7515bf9366d4128b5be03fea719bb7ed473249f57b8c5a1f2a51581ebf8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.10471204188481675,
5
  "eval_steps": 40,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -29,6 +29,28 @@
29
  "learning_rate": 7.272727272727273e-05,
30
  "loss": 1.8451,
31
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "logging_steps": 10,
@@ -48,7 +70,7 @@
48
  "attributes": {}
49
  }
50
  },
51
- "total_flos": 6949228630769664.0,
52
  "train_batch_size": 2,
53
  "trial_name": null,
54
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2094240837696335,
5
  "eval_steps": 40,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
29
  "learning_rate": 7.272727272727273e-05,
30
  "loss": 1.8451,
31
  "step": 20
32
+ },
33
+ {
34
+ "epoch": 0.15706806282722513,
35
+ "grad_norm": 3.484375,
36
+ "learning_rate": 0.00010909090909090909,
37
+ "loss": 1.8315,
38
+ "step": 30
39
+ },
40
+ {
41
+ "epoch": 0.2094240837696335,
42
+ "grad_norm": 3.390625,
43
+ "learning_rate": 0.00014545454545454546,
44
+ "loss": 2.0039,
45
+ "step": 40
46
+ },
47
+ {
48
+ "epoch": 0.2094240837696335,
49
+ "eval_loss": 1.9543291330337524,
50
+ "eval_runtime": 73.5405,
51
+ "eval_samples_per_second": 20.411,
52
+ "eval_steps_per_second": 5.113,
53
+ "step": 40
54
  }
55
  ],
56
  "logging_steps": 10,
 
70
  "attributes": {}
71
  }
72
  },
73
+ "total_flos": 1.3898457261539328e+16,
74
  "train_batch_size": 2,
75
  "trial_name": null,
76
  "trial_params": null