Backup-bdg commited on
Commit
33885fd
·
verified ·
1 Parent(s): 39827f9

Update model weights after training (epoch 7, loss 4.0529)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1628ac8faba5b54287f88aae6cd7885ccfa61f06ea0a09b08f01e91463b96df6
3
  size 1458410612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d441ee5242446730041340f4527da087aacd6ed532fe2360272445c4a95f61
3
  size 1458410612
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01f2c0d12d9a882e71b37a268aa426e99b03cbc1372629eb283a28a10d05d5c6
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b80e7ffc0041a1d36ca67d251b7699f834fd719ebda58a39a2fb0b50e44db53
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcb8a06e9fc3b7ab14df9d1e54eea7fe4732a0ece031f4af02f4bed76416c620
3
  size 1506832040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f634772387625e35cd41da1b9426234e6eab6e1df5648597b153a1e9095fe226
3
  size 1506832040
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 98,
3
- "unique_samples": 400,
4
- "total_yields": 800,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -13,8 +13,8 @@
13
  "WildChat": 200,
14
  "Synth-ShellExecution": 200,
15
  "Midjourney-Prompts": 200,
16
- "Synth-KnowledgeCutoff": 200,
17
- "Synth-GroundedResponse": 200,
18
  "CodeParrot-Clean": 200,
19
  "ShareGPT-Clean": 200,
20
  "Synth-Issues": 200,
@@ -26,7 +26,7 @@
26
  "HumanEval-JavaScript": 164,
27
  "OpenOrca": 450,
28
  "Synth-MultiStepExecution": 200,
29
- "Synth-Citation": 200,
30
  "NoRobots": 450,
31
  "Synth-LanguageSetup": 200,
32
  "Function-Calling-ChatML": 200,
@@ -38,17 +38,17 @@
38
  "SD-Prompts": 200,
39
  "Synth-Diffs": 200,
40
  "Golang-Coder": 200,
41
- "Synth-ConfidenceLevel": 200,
42
  "Synth-RepoContext": 200,
43
  "HumanEval-Go": 164,
44
- "Synth-SelfCorrection": 200,
45
- "Synth-FactCheck": 200,
46
  "Synth-Downloads": 200,
47
- "Synth-RetrievalGrounded": 200,
48
- "Synth-IDK": 200,
49
  "Synth-APIGen": 200,
50
  "Synth-PythonScripts": 200,
51
- "Synth-Uncertainty": 200,
52
  "HumanEval-Python": 164,
53
  "Golang-QA-2k": 200,
54
  "Synth-ShellErrors": 200,
@@ -145,7 +145,16 @@
145
  "Cosmopedia-OpenStax": 600,
146
  "MedMCQA": 350,
147
  "Medical-Reasoning-SFT-Mega": 350,
148
- "Medical-O1-Reasoning-EN": 350
 
 
 
 
 
 
 
 
 
149
  },
150
  "image": {
151
  "WebSight": 386,
@@ -170,10 +179,11 @@
170
  "audio": {}
171
  },
172
  "modality_counts": {
173
- "text": 400,
174
  "image": 0,
175
  "video": 0,
176
- "audio": 0
 
177
  },
178
  "last_modality": null
179
  }
 
1
  {
2
+ "epoch": 111,
3
+ "unique_samples": 450,
4
+ "total_yields": 900,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
13
  "WildChat": 200,
14
  "Synth-ShellExecution": 200,
15
  "Midjourney-Prompts": 200,
16
+ "Synth-KnowledgeCutoff": 550,
17
+ "Synth-GroundedResponse": 550,
18
  "CodeParrot-Clean": 200,
19
  "ShareGPT-Clean": 200,
20
  "Synth-Issues": 200,
 
26
  "HumanEval-JavaScript": 164,
27
  "OpenOrca": 450,
28
  "Synth-MultiStepExecution": 200,
29
+ "Synth-Citation": 550,
30
  "NoRobots": 450,
31
  "Synth-LanguageSetup": 200,
32
  "Function-Calling-ChatML": 200,
 
38
  "SD-Prompts": 200,
39
  "Synth-Diffs": 200,
40
  "Golang-Coder": 200,
41
+ "Synth-ConfidenceLevel": 550,
42
  "Synth-RepoContext": 200,
43
  "HumanEval-Go": 164,
44
+ "Synth-SelfCorrection": 550,
45
+ "Synth-FactCheck": 550,
46
  "Synth-Downloads": 200,
47
+ "Synth-RetrievalGrounded": 550,
48
+ "Synth-IDK": 550,
49
  "Synth-APIGen": 200,
50
  "Synth-PythonScripts": 200,
51
+ "Synth-Uncertainty": 550,
52
  "HumanEval-Python": 164,
53
  "Golang-QA-2k": 200,
54
  "Synth-ShellErrors": 200,
 
145
  "Cosmopedia-OpenStax": 600,
146
  "MedMCQA": 350,
147
  "Medical-Reasoning-SFT-Mega": 350,
148
+ "Medical-O1-Reasoning-EN": 350,
149
+ "Synth-SelfCorrection": 550,
150
+ "Synth-GroundedResponse": 550,
151
+ "Synth-IDK": 550,
152
+ "Synth-KnowledgeCutoff": 550,
153
+ "Synth-RetrievalGrounded": 550,
154
+ "Synth-FactCheck": 550,
155
+ "Synth-ConfidenceLevel": 550,
156
+ "Synth-Citation": 550,
157
+ "Synth-Uncertainty": 550
158
  },
159
  "image": {
160
  "WebSight": 386,
 
179
  "audio": {}
180
  },
181
  "modality_counts": {
182
+ "text": 0,
183
  "image": 0,
184
  "video": 0,
185
+ "audio": 0,
186
+ "anti_hallucination": 450
187
  },
188
  "last_modality": null
189
  }
trainer_state.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 5.354270628392697,
4
  "epoch": 7,
5
  "epochs_completed": 7,
6
- "global_step": 350,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 350,
12
  "num_train_epochs": 7,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 4.052912997868326,
4
  "epoch": 7,
5
  "epochs_completed": 7,
6
+ "global_step": 392,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 392,
12
  "num_train_epochs": 7,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:148961e9ff451a61d54b8edf577bcc3dd080efec47d5d54d58a5870199de86a1
3
  size 1514912171
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6774f59a3551cdf2b9af488e15bdec2bf75577937a4376aa12ec72a1c950e780
3
  size 1514912171