Any-to-Any
Transformers
Safetensors
English
xoron
multimodal
Mixture of Experts
text-to-image
image editing
image to video
text-to-video
video editing
text-to-speech
speech-to-text
speech-to-speech
image-to-text
video-to-text
agentic
tool-use
flow-matching
3d-rope
titok
vidtok
dual-stream-attention
zero-shot-voice-cloning
bigvgan
snake-activation
multi-receptive-field-fusion
custom_code
Update model weights after training (epoch 7, loss 4.0529)
Browse files- audio_decoder.safetensors +1 -1
- cross_attention.safetensors +1 -1
- llm.safetensors +1 -1
- streaming_state.json +25 -15
- trainer_state.json +3 -3
- training_state.pt +1 -1
audio_decoder.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1458410612
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0d441ee5242446730041340f4527da087aacd6ed532fe2360272445c4a95f61
|
| 3 |
size 1458410612
|
cross_attention.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 174191400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b80e7ffc0041a1d36ca67d251b7699f834fd719ebda58a39a2fb0b50e44db53
|
| 3 |
size 174191400
|
llm.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1506832040
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f634772387625e35cd41da1b9426234e6eab6e1df5648597b153a1e9095fe226
|
| 3 |
size 1506832040
|
streaming_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"unique_samples":
|
| 4 |
-
"total_yields":
|
| 5 |
"dataset_positions": {
|
| 6 |
"WebSight": 386,
|
| 7 |
"ScienceQA": 364,
|
|
@@ -13,8 +13,8 @@
|
|
| 13 |
"WildChat": 200,
|
| 14 |
"Synth-ShellExecution": 200,
|
| 15 |
"Midjourney-Prompts": 200,
|
| 16 |
-
"Synth-KnowledgeCutoff":
|
| 17 |
-
"Synth-GroundedResponse":
|
| 18 |
"CodeParrot-Clean": 200,
|
| 19 |
"ShareGPT-Clean": 200,
|
| 20 |
"Synth-Issues": 200,
|
|
@@ -26,7 +26,7 @@
|
|
| 26 |
"HumanEval-JavaScript": 164,
|
| 27 |
"OpenOrca": 450,
|
| 28 |
"Synth-MultiStepExecution": 200,
|
| 29 |
-
"Synth-Citation":
|
| 30 |
"NoRobots": 450,
|
| 31 |
"Synth-LanguageSetup": 200,
|
| 32 |
"Function-Calling-ChatML": 200,
|
|
@@ -38,17 +38,17 @@
|
|
| 38 |
"SD-Prompts": 200,
|
| 39 |
"Synth-Diffs": 200,
|
| 40 |
"Golang-Coder": 200,
|
| 41 |
-
"Synth-ConfidenceLevel":
|
| 42 |
"Synth-RepoContext": 200,
|
| 43 |
"HumanEval-Go": 164,
|
| 44 |
-
"Synth-SelfCorrection":
|
| 45 |
-
"Synth-FactCheck":
|
| 46 |
"Synth-Downloads": 200,
|
| 47 |
-
"Synth-RetrievalGrounded":
|
| 48 |
-
"Synth-IDK":
|
| 49 |
"Synth-APIGen": 200,
|
| 50 |
"Synth-PythonScripts": 200,
|
| 51 |
-
"Synth-Uncertainty":
|
| 52 |
"HumanEval-Python": 164,
|
| 53 |
"Golang-QA-2k": 200,
|
| 54 |
"Synth-ShellErrors": 200,
|
|
@@ -145,7 +145,16 @@
|
|
| 145 |
"Cosmopedia-OpenStax": 600,
|
| 146 |
"MedMCQA": 350,
|
| 147 |
"Medical-Reasoning-SFT-Mega": 350,
|
| 148 |
-
"Medical-O1-Reasoning-EN": 350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
},
|
| 150 |
"image": {
|
| 151 |
"WebSight": 386,
|
|
@@ -170,10 +179,11 @@
|
|
| 170 |
"audio": {}
|
| 171 |
},
|
| 172 |
"modality_counts": {
|
| 173 |
-
"text":
|
| 174 |
"image": 0,
|
| 175 |
"video": 0,
|
| 176 |
-
"audio": 0
|
|
|
|
| 177 |
},
|
| 178 |
"last_modality": null
|
| 179 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 111,
|
| 3 |
+
"unique_samples": 450,
|
| 4 |
+
"total_yields": 900,
|
| 5 |
"dataset_positions": {
|
| 6 |
"WebSight": 386,
|
| 7 |
"ScienceQA": 364,
|
|
|
|
| 13 |
"WildChat": 200,
|
| 14 |
"Synth-ShellExecution": 200,
|
| 15 |
"Midjourney-Prompts": 200,
|
| 16 |
+
"Synth-KnowledgeCutoff": 550,
|
| 17 |
+
"Synth-GroundedResponse": 550,
|
| 18 |
"CodeParrot-Clean": 200,
|
| 19 |
"ShareGPT-Clean": 200,
|
| 20 |
"Synth-Issues": 200,
|
|
|
|
| 26 |
"HumanEval-JavaScript": 164,
|
| 27 |
"OpenOrca": 450,
|
| 28 |
"Synth-MultiStepExecution": 200,
|
| 29 |
+
"Synth-Citation": 550,
|
| 30 |
"NoRobots": 450,
|
| 31 |
"Synth-LanguageSetup": 200,
|
| 32 |
"Function-Calling-ChatML": 200,
|
|
|
|
| 38 |
"SD-Prompts": 200,
|
| 39 |
"Synth-Diffs": 200,
|
| 40 |
"Golang-Coder": 200,
|
| 41 |
+
"Synth-ConfidenceLevel": 550,
|
| 42 |
"Synth-RepoContext": 200,
|
| 43 |
"HumanEval-Go": 164,
|
| 44 |
+
"Synth-SelfCorrection": 550,
|
| 45 |
+
"Synth-FactCheck": 550,
|
| 46 |
"Synth-Downloads": 200,
|
| 47 |
+
"Synth-RetrievalGrounded": 550,
|
| 48 |
+
"Synth-IDK": 550,
|
| 49 |
"Synth-APIGen": 200,
|
| 50 |
"Synth-PythonScripts": 200,
|
| 51 |
+
"Synth-Uncertainty": 550,
|
| 52 |
"HumanEval-Python": 164,
|
| 53 |
"Golang-QA-2k": 200,
|
| 54 |
"Synth-ShellErrors": 200,
|
|
|
|
| 145 |
"Cosmopedia-OpenStax": 600,
|
| 146 |
"MedMCQA": 350,
|
| 147 |
"Medical-Reasoning-SFT-Mega": 350,
|
| 148 |
+
"Medical-O1-Reasoning-EN": 350,
|
| 149 |
+
"Synth-SelfCorrection": 550,
|
| 150 |
+
"Synth-GroundedResponse": 550,
|
| 151 |
+
"Synth-IDK": 550,
|
| 152 |
+
"Synth-KnowledgeCutoff": 550,
|
| 153 |
+
"Synth-RetrievalGrounded": 550,
|
| 154 |
+
"Synth-FactCheck": 550,
|
| 155 |
+
"Synth-ConfidenceLevel": 550,
|
| 156 |
+
"Synth-Citation": 550,
|
| 157 |
+
"Synth-Uncertainty": 550
|
| 158 |
},
|
| 159 |
"image": {
|
| 160 |
"WebSight": 386,
|
|
|
|
| 179 |
"audio": {}
|
| 180 |
},
|
| 181 |
"modality_counts": {
|
| 182 |
+
"text": 0,
|
| 183 |
"image": 0,
|
| 184 |
"video": 0,
|
| 185 |
+
"audio": 0,
|
| 186 |
+
"anti_hallucination": 450
|
| 187 |
},
|
| 188 |
"last_modality": null
|
| 189 |
}
|
trainer_state.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
-
"best_metric":
|
| 4 |
"epoch": 7,
|
| 5 |
"epochs_completed": 7,
|
| 6 |
-
"global_step":
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
-
"max_steps":
|
| 12 |
"num_train_epochs": 7,
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
|
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
+
"best_metric": 4.052912997868326,
|
| 4 |
"epoch": 7,
|
| 5 |
"epochs_completed": 7,
|
| 6 |
+
"global_step": 392,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
+
"max_steps": 392,
|
| 12 |
"num_train_epochs": 7,
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
training_state.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1514912171
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6774f59a3551cdf2b9af488e15bdec2bf75577937a4376aa12ec72a1c950e780
|
| 3 |
size 1514912171
|