SystemAdmin123 commited on
Commit
478d9c6
·
verified ·
1 Parent(s): 326b84b

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:399339e7e9729cadf83e4291e8d1f52cdf87d221e960a2bc5c0c3d73bc165c5a
3
  size 4976947640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad688aa4cd7c281ac69904318599eb36efc8a2d31ceff356bfb837049ac2448
3
  size 4976947640
last-checkpoint/model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38f4cc1a24d0b85ffbfae769b334b74f7fbae4b87186977fe17c5497ad180fa9
3
  size 4966790216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dc04b843b1afea8ccda6ffe1ef6c6b87917f4f5fb6f30c2d0c708a0e20c4574
3
  size 4966790216
last-checkpoint/model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7198e05c748d588cce776633ff44d0fc7ad90dab772de10c72446c856932c08
3
  size 2158075194
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb07680d625dd45fbbddadf89625d2156bdb1038d76c129648ca78a74711cd52
3
  size 2158075194
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cfb3e4512e55f025985c0d6c5f29042f27479ab5b5a2f9637cddc12386d4925
3
  size 12291216748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b822eaa8bd74c1deab3ca5a01acaa34c22773b0a367ce6e50ef2a34fe26ec94
3
  size 12291216748
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3586098707d2957df6ef3e483057ea3562f7346e6119a129cb2cf6003c7f89b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4de625c1f1d2b13bc455a5c9ac91a5410dc5d8bd1f3408dde82d07d045a46f90
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0164d72205b9e51777b19957f1b47571930d2781984e660ccfae2b5af3fc748e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995528e674600af77a00417696d339d5e638d4bbdf33e157e2253a8583531200
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a1b22b9cc881f5982527af53e598bb5ac64ac0a314645d7d2c1a629460cd671
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a88bb0ffea7486980c2da18fda2e0c02dd389c0203de8a960915dbc0a2cf70
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e897f16517849a06894ae8867d75800c12bffee7e87cd6c8e2f792826e904ada
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7653cea2d766b8ca1ba3fbaf4fb45a4729136b0a3eda4b34769e4708680d4d9b
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:740031d4f57d6c88f6a7fb938e78d9afddeb77e3969b2145157d2e65123b372c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed051585a93ab592517e46872815f40c0e3b41050eca4ba6345891a988101280
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.066006600660066,
5
  "eval_steps": 40,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -51,6 +51,42 @@
51
  "eval_samples_per_second": 46.566,
52
  "eval_steps_per_second": 3.882,
53
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
  ],
56
  "logging_steps": 10,
@@ -70,7 +106,7 @@
70
  "attributes": {}
71
  }
72
  },
73
- "total_flos": 3.4471959420993536e+16,
74
  "train_batch_size": 3,
75
  "trial_name": null,
76
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.132013201320132,
5
  "eval_steps": 40,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
51
  "eval_samples_per_second": 46.566,
52
  "eval_steps_per_second": 3.882,
53
  "step": 40
54
+ },
55
+ {
56
+ "epoch": 0.08250825082508251,
57
+ "grad_norm": 16.5,
58
+ "learning_rate": 3.571428571428572e-05,
59
+ "loss": 4.6939,
60
+ "step": 50
61
+ },
62
+ {
63
+ "epoch": 0.09900990099009901,
64
+ "grad_norm": 16.0,
65
+ "learning_rate": 4.2857142857142856e-05,
66
+ "loss": 4.3269,
67
+ "step": 60
68
+ },
69
+ {
70
+ "epoch": 0.11551155115511551,
71
+ "grad_norm": 11.75,
72
+ "learning_rate": 5e-05,
73
+ "loss": 4.0728,
74
+ "step": 70
75
+ },
76
+ {
77
+ "epoch": 0.132013201320132,
78
+ "grad_norm": 5.5625,
79
+ "learning_rate": 5.714285714285714e-05,
80
+ "loss": 3.9329,
81
+ "step": 80
82
+ },
83
+ {
84
+ "epoch": 0.132013201320132,
85
+ "eval_loss": 3.323232412338257,
86
+ "eval_runtime": 215.3024,
87
+ "eval_samples_per_second": 46.353,
88
+ "eval_steps_per_second": 3.864,
89
+ "step": 80
90
  }
91
  ],
92
  "logging_steps": 10,
 
106
  "attributes": {}
107
  }
108
  },
109
+ "total_flos": 6.894391884198707e+16,
110
  "train_batch_size": 3,
111
  "trial_name": null,
112
  "trial_params": null