Any-to-Any
Transformers
Safetensors
English
xoron
multimodal
Mixture of Experts
text-to-image
image editing
image to video
text-to-video
video editing
text-to-speech
speech-to-text
speech-to-speech
image-to-text
video-to-text
agentic
tool-use
flow-matching
3d-rope
titok
vidtok
dual-stream-attention
zero-shot-voice-cloning
bigvgan
snake-activation
multi-receptive-field-fusion
custom_code
Update model weights after training (epoch 7, loss 6.6223)
Browse files- audio_decoder.safetensors +1 -1
- cross_attention.safetensors +1 -1
- llm.safetensors +1 -1
- streaming_state.json +18 -18
- trainer_state.json +6 -6
- training_state.pt +2 -2
audio_decoder.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1458410612
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b122e6fd48e418062afa9a2835c289fd000c1b39c3339a4a203be9976b7ac486
|
| 3 |
size 1458410612
|
cross_attention.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 174191400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15e348f1b98e8cc48f633f80a818a98a727f8a95e3794d3d7496c7c67d319c21
|
| 3 |
size 174191400
|
llm.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1506832040
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3888d6f2029add98a6540daf90a2fffaf8b2c0420fca1b401042a37ae56f957f
|
| 3 |
size 1506832040
|
streaming_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"unique_samples":
|
| 4 |
-
"total_yields":
|
| 5 |
"dataset_positions": {
|
| 6 |
"WebSight": 386,
|
| 7 |
"ScienceQA": 364,
|
|
@@ -18,16 +18,16 @@
|
|
| 18 |
"CodeParrot-Clean": 200,
|
| 19 |
"ShareGPT-Clean": 200,
|
| 20 |
"Synth-Issues": 200,
|
| 21 |
-
"Dolly-15k":
|
| 22 |
-
"Conversation-Summarization":
|
| 23 |
"Synth-ShellTimeout": 200,
|
| 24 |
"Synth-Docker": 200,
|
| 25 |
"Synth-Documents": 450,
|
| 26 |
"HumanEval-JavaScript": 164,
|
| 27 |
-
"OpenOrca":
|
| 28 |
"Synth-MultiStepExecution": 200,
|
| 29 |
"Synth-Citation": 550,
|
| 30 |
-
"NoRobots":
|
| 31 |
"Synth-LanguageSetup": 200,
|
| 32 |
"Function-Calling-ChatML": 200,
|
| 33 |
"Synth-CoT": 550,
|
|
@@ -75,7 +75,7 @@
|
|
| 75 |
"Synth-Debugging": 200,
|
| 76 |
"Tool-Calls-SingleTurn": 200,
|
| 77 |
"Tool-Calls-Multiturn": 200,
|
| 78 |
-
"OpenAssistant":
|
| 79 |
"T2V-Sora-Preferences-2": 650,
|
| 80 |
"T2V-Human-Preferences": 650,
|
| 81 |
"Sora-Alignment-Likert": 198,
|
|
@@ -85,8 +85,8 @@
|
|
| 85 |
"Sora-Physics-Likert": 198,
|
| 86 |
"TIP-I2V": 650,
|
| 87 |
"Pexels-I2V-350k": 650,
|
| 88 |
-
"SmolTalk-OpenHermes":
|
| 89 |
-
"SmolTalk-All":
|
| 90 |
"Cosmopedia-AutoMath": 600,
|
| 91 |
"OpenMathInstruct-1": 600,
|
| 92 |
"NuminaMath-CoT": 600,
|
|
@@ -105,11 +105,11 @@
|
|
| 105 |
"Midjourney-Prompts": 200,
|
| 106 |
"CodeParrot-Clean": 200,
|
| 107 |
"ShareGPT-Clean": 200,
|
| 108 |
-
"Dolly-15k":
|
| 109 |
-
"Conversation-Summarization":
|
| 110 |
"HumanEval-JavaScript": 164,
|
| 111 |
-
"OpenOrca":
|
| 112 |
-
"NoRobots":
|
| 113 |
"Function-Calling-ChatML": 200,
|
| 114 |
"Python-Code-18k": 200,
|
| 115 |
"Code-Feedback": 200,
|
|
@@ -132,9 +132,9 @@
|
|
| 132 |
"HumanEval-Rust": 164,
|
| 133 |
"Tool-Calls-SingleTurn": 200,
|
| 134 |
"Tool-Calls-Multiturn": 200,
|
| 135 |
-
"OpenAssistant":
|
| 136 |
-
"SmolTalk-OpenHermes":
|
| 137 |
-
"SmolTalk-All":
|
| 138 |
"Cosmopedia-AutoMath": 600,
|
| 139 |
"OpenMathInstruct-1": 600,
|
| 140 |
"NuminaMath-CoT": 600,
|
|
@@ -179,7 +179,7 @@
|
|
| 179 |
"audio": {}
|
| 180 |
},
|
| 181 |
"modality_counts": {
|
| 182 |
-
"text":
|
| 183 |
"image": 0,
|
| 184 |
"video": 0,
|
| 185 |
"audio": 0
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 135,
|
| 3 |
+
"unique_samples": 350,
|
| 4 |
+
"total_yields": 700,
|
| 5 |
"dataset_positions": {
|
| 6 |
"WebSight": 386,
|
| 7 |
"ScienceQA": 364,
|
|
|
|
| 18 |
"CodeParrot-Clean": 200,
|
| 19 |
"ShareGPT-Clean": 200,
|
| 20 |
"Synth-Issues": 200,
|
| 21 |
+
"Dolly-15k": 800,
|
| 22 |
+
"Conversation-Summarization": 800,
|
| 23 |
"Synth-ShellTimeout": 200,
|
| 24 |
"Synth-Docker": 200,
|
| 25 |
"Synth-Documents": 450,
|
| 26 |
"HumanEval-JavaScript": 164,
|
| 27 |
+
"OpenOrca": 800,
|
| 28 |
"Synth-MultiStepExecution": 200,
|
| 29 |
"Synth-Citation": 550,
|
| 30 |
+
"NoRobots": 800,
|
| 31 |
"Synth-LanguageSetup": 200,
|
| 32 |
"Function-Calling-ChatML": 200,
|
| 33 |
"Synth-CoT": 550,
|
|
|
|
| 75 |
"Synth-Debugging": 200,
|
| 76 |
"Tool-Calls-SingleTurn": 200,
|
| 77 |
"Tool-Calls-Multiturn": 200,
|
| 78 |
+
"OpenAssistant": 800,
|
| 79 |
"T2V-Sora-Preferences-2": 650,
|
| 80 |
"T2V-Human-Preferences": 650,
|
| 81 |
"Sora-Alignment-Likert": 198,
|
|
|
|
| 85 |
"Sora-Physics-Likert": 198,
|
| 86 |
"TIP-I2V": 650,
|
| 87 |
"Pexels-I2V-350k": 650,
|
| 88 |
+
"SmolTalk-OpenHermes": 600,
|
| 89 |
+
"SmolTalk-All": 600,
|
| 90 |
"Cosmopedia-AutoMath": 600,
|
| 91 |
"OpenMathInstruct-1": 600,
|
| 92 |
"NuminaMath-CoT": 600,
|
|
|
|
| 105 |
"Midjourney-Prompts": 200,
|
| 106 |
"CodeParrot-Clean": 200,
|
| 107 |
"ShareGPT-Clean": 200,
|
| 108 |
+
"Dolly-15k": 800,
|
| 109 |
+
"Conversation-Summarization": 800,
|
| 110 |
"HumanEval-JavaScript": 164,
|
| 111 |
+
"OpenOrca": 800,
|
| 112 |
+
"NoRobots": 800,
|
| 113 |
"Function-Calling-ChatML": 200,
|
| 114 |
"Python-Code-18k": 200,
|
| 115 |
"Code-Feedback": 200,
|
|
|
|
| 132 |
"HumanEval-Rust": 164,
|
| 133 |
"Tool-Calls-SingleTurn": 200,
|
| 134 |
"Tool-Calls-Multiturn": 200,
|
| 135 |
+
"OpenAssistant": 800,
|
| 136 |
+
"SmolTalk-OpenHermes": 600,
|
| 137 |
+
"SmolTalk-All": 600,
|
| 138 |
"Cosmopedia-AutoMath": 600,
|
| 139 |
"OpenMathInstruct-1": 600,
|
| 140 |
"NuminaMath-CoT": 600,
|
|
|
|
| 179 |
"audio": {}
|
| 180 |
},
|
| 181 |
"modality_counts": {
|
| 182 |
+
"text": 350,
|
| 183 |
"image": 0,
|
| 184 |
"video": 0,
|
| 185 |
"audio": 0
|
trainer_state.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
-
"best_metric": 6.
|
| 4 |
-
"epoch":
|
| 5 |
-
"epochs_completed":
|
| 6 |
-
"global_step":
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
-
"max_steps":
|
| 12 |
-
"num_train_epochs":
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
| 15 |
"effective_batch_size": 16,
|
|
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
+
"best_metric": 6.622317645549774,
|
| 4 |
+
"epoch": 7,
|
| 5 |
+
"epochs_completed": 7,
|
| 6 |
+
"global_step": 301,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
+
"max_steps": 301,
|
| 12 |
+
"num_train_epochs": 7,
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
| 15 |
"effective_batch_size": 16,
|
training_state.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a89e9a0652c7c060ae5d2f1211f9a8ce9e301009c1282faa827cfb44a01e4db3
|
| 3 |
+
size 1514912171
|