Any-to-Any
Transformers
Safetensors
English
xoron
multimodal
Mixture of Experts
text-to-image
image editing
image to video
text-to-video
video editing
text-to-speech
speech-to-text
speech-to-speech
image-to-text
video-to-text
agentic
tool-use
flow-matching
3d-rope
titok
vidtok
dual-stream-attention
zero-shot-voice-cloning
bigvgan
snake-activation
multi-receptive-field-fusion
custom_code
Update model weights after training (epoch 2, loss 3.9784)
Browse files- audio_decoder.safetensors +1 -1
- chat_template.jinja +4 -163
- cross_attention.safetensors +1 -1
- llm.safetensors +1 -1
- streaming_state.json +15 -15
- trainer_state.json +10 -10
- training_state.pt +2 -2
- video_generator.safetensors +1 -1
audio_decoder.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1458410612
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49e6f986a4bddecd8227e793979c365ea238167aa0d87886dc13ece8b990cd89
|
| 3 |
size 1458410612
|
chat_template.jinja
CHANGED
|
@@ -6,181 +6,22 @@
|
|
| 6 |
{%- set user_end = '<|/user|>' -%}
|
| 7 |
{%- set assistant_start = '<|assistant|>' -%}
|
| 8 |
{%- set assistant_end = '<|/assistant|>' -%}
|
| 9 |
-
{%- set image_start = '<|image|>' -%}
|
| 10 |
-
{%- set image_end = '<|/image|>' -%}
|
| 11 |
-
{%- set video_start = '<|video|>' -%}
|
| 12 |
-
{%- set video_end = '<|/video|>' -%}
|
| 13 |
-
{%- set audio_start = '<|audio|>' -%}
|
| 14 |
-
{%- set audio_end = '<|/audio|>' -%}
|
| 15 |
-
{%- set doc_start = '<|doc|>' -%}
|
| 16 |
-
{%- set doc_end = '<|/doc|>' -%}
|
| 17 |
-
{%- set tool_call_start = '<|tool_call|>' -%}
|
| 18 |
-
{%- set tool_call_end = '<|/tool_call|>' -%}
|
| 19 |
-
{%- set tool_result_start = '<|tool_result|>' -%}
|
| 20 |
-
{%- set tool_result_end = '<|/tool_result|>' -%}
|
| 21 |
-
{%- set tools_start = '<|tools|>' -%}
|
| 22 |
-
{%- set tools_end = '<|/tools|>' -%}
|
| 23 |
-
{%- set available_tools_start = '<|available_tools|>' -%}
|
| 24 |
-
{%- set available_tools_end = '<|/available_tools|>' -%}
|
| 25 |
-
{%- set function_name_start = '<|function_name|>' -%}
|
| 26 |
-
{%- set function_name_end = '<|/function_name|>' -%}
|
| 27 |
-
{%- set function_args_start = '<|function_args|>' -%}
|
| 28 |
-
{%- set function_args_end = '<|/function_args|>' -%}
|
| 29 |
-
{%- set think_start = '<|think|>' -%}
|
| 30 |
-
{%- set think_end = '<|/think|>' -%}
|
| 31 |
-
{%- set plan_start = '<|plan|>' -%}
|
| 32 |
-
{%- set plan_end = '<|/plan|>' -%}
|
| 33 |
-
{%- set critique_start = '<|critique|>' -%}
|
| 34 |
-
{%- set critique_end = '<|/critique|>' -%}
|
| 35 |
-
{%- set analysis_start = '<|analysis|>' -%}
|
| 36 |
-
{%- set analysis_end = '<|/analysis|>' -%}
|
| 37 |
-
{%- set observation_start = '<|observation|>' -%}
|
| 38 |
-
{%- set observation_end = '<|/observation|>' -%}
|
| 39 |
-
{%- set reflection_start = '<|reflection|>' -%}
|
| 40 |
-
{%- set reflection_end = '<|/reflection|>' -%}
|
| 41 |
-
{%- set conclusion_start = '<|conclusion|>' -%}
|
| 42 |
-
{%- set conclusion_end = '<|/conclusion|>' -%}
|
| 43 |
-
{%- set code_start = '<|code|>' -%}
|
| 44 |
-
{%- set code_end = '<|/code|>' -%}
|
| 45 |
-
{%- set exec_start = '<|exec|>' -%}
|
| 46 |
-
{%- set exec_end = '<|/exec|>' -%}
|
| 47 |
-
{%- set exec_result = '<|exec_result|>' -%}
|
| 48 |
-
{%- set exec_result_end = '<|/exec_result|>' -%}
|
| 49 |
-
{%- set jupyter_code = '<|jupyter_code|>' -%}
|
| 50 |
-
{%- set jupyter_code_end = '<|/jupyter_code|>' -%}
|
| 51 |
-
{%- set jupyter_output = '<|jupyter_output|>' -%}
|
| 52 |
-
{%- set jupyter_output_end = '<|/jupyter_output|>' -%}
|
| 53 |
-
{%- set gen_image_start = '<|gen_image|>' -%}
|
| 54 |
-
{%- set gen_image_end = '<|/gen_image|>' -%}
|
| 55 |
-
{%- set gen_video_start = '<|gen_video|>' -%}
|
| 56 |
-
{%- set gen_video_end = '<|/gen_video|>' -%}
|
| 57 |
-
{%- set speak_start = '<|speak|>' -%}
|
| 58 |
-
{%- set speak_end = '<|/speak|>' -%}
|
| 59 |
-
{%- set listen_start = '<|listen|>' -%}
|
| 60 |
-
{%- set listen_end = '<|/listen|>' -%}
|
| 61 |
-
{%- set memory_start = '<|memory|>' -%}
|
| 62 |
-
{%- set memory_end = '<|/memory|>' -%}
|
| 63 |
-
{%- set context_start = '<|context|>' -%}
|
| 64 |
-
{%- set context_end = '<|/context|>' -%}
|
| 65 |
-
{%- set uncertain_start = '<|uncertain|>' -%}
|
| 66 |
-
{%- set uncertain_end = '<|/uncertain|>' -%}
|
| 67 |
-
{%- set cite_start = '<|cite|>' -%}
|
| 68 |
-
{%- set cite_end = '<|/cite|>' -%}
|
| 69 |
-
{%- set eod = '<|eod|>' -%}
|
| 70 |
|
| 71 |
{{- bos -}}
|
| 72 |
-
{%- if messages[0]['role'] == 'system' -%}
|
| 73 |
-
{{- system_start + messages[0]['content'] + system_end -}}
|
| 74 |
-
{%- set messages = messages[1:] -%}
|
| 75 |
-
{%- endif -%}
|
| 76 |
-
{%- if available_tools is defined and available_tools -%}
|
| 77 |
-
{{- available_tools_start + available_tools + available_tools_end -}}
|
| 78 |
-
{%- elif tools is defined and tools -%}
|
| 79 |
-
{{- tools_start + tools + tools_end -}}
|
| 80 |
-
{%- endif -%}
|
| 81 |
-
{%- if memory is defined and memory -%}
|
| 82 |
-
{{- memory_start + memory + memory_end -}}
|
| 83 |
-
{%- endif -%}
|
| 84 |
-
{%- if context is defined and context -%}
|
| 85 |
-
{{- context_start + context + context_end -}}
|
| 86 |
-
{%- endif -%}
|
| 87 |
{%- for message in messages -%}
|
| 88 |
{%- if message['role'] == 'system' -%}
|
| 89 |
{{- system_start + message['content'] + system_end -}}
|
| 90 |
{%- elif message['role'] == 'user' -%}
|
| 91 |
-
{{- user_start -}}
|
| 92 |
-
{%- if message.get('images') -%}
|
| 93 |
-
{%- for img in message['images'] -%}
|
| 94 |
-
{{- image_start + img + image_end -}}
|
| 95 |
-
{%- endfor -%}
|
| 96 |
-
{%- endif -%}
|
| 97 |
-
{%- if message.get('videos') -%}
|
| 98 |
-
{%- for vid in message['videos'] -%}
|
| 99 |
-
{{- video_start + vid + video_end -}}
|
| 100 |
-
{%- endfor -%}
|
| 101 |
-
{%- endif -%}
|
| 102 |
-
{%- if message.get('audio') -%}
|
| 103 |
-
{%- for aud in message['audio'] -%}
|
| 104 |
-
{{- audio_start + aud + audio_end -}}
|
| 105 |
-
{%- endfor -%}
|
| 106 |
-
{%- endif -%}
|
| 107 |
-
{%- if message.get('documents') -%}
|
| 108 |
-
{%- for doc in message['documents'] -%}
|
| 109 |
-
{{- doc_start + doc + doc_end -}}
|
| 110 |
-
{%- endfor -%}
|
| 111 |
-
{%- endif -%}
|
| 112 |
-
{{- message['content'] + user_end -}}
|
| 113 |
{%- elif message['role'] == 'assistant' -%}
|
| 114 |
-
{{- assistant_start -}}
|
| 115 |
-
{%- if message.get('thinking') -%}
|
| 116 |
-
{{- think_start + message['thinking'] + think_end -}}
|
| 117 |
-
{%- endif -%}
|
| 118 |
-
{%- if message.get('planning') -%}
|
| 119 |
-
{{- plan_start + message['planning'] + plan_end -}}
|
| 120 |
-
{%- endif -%}
|
| 121 |
-
{%- if message.get('analysis') -%}
|
| 122 |
-
{{- analysis_start + message['analysis'] + analysis_end -}}
|
| 123 |
-
{%- endif -%}
|
| 124 |
-
{%- if message.get('observation') -%}
|
| 125 |
-
{{- observation_start + message['observation'] + observation_end -}}
|
| 126 |
-
{%- endif -%}
|
| 127 |
-
{%- if message.get('reflection') -%}
|
| 128 |
-
{{- reflection_start + message['reflection'] + reflection_end -}}
|
| 129 |
-
{%- endif -%}
|
| 130 |
-
{%- if message.get('critique') -%}
|
| 131 |
-
{{- critique_start + message['critique'] + critique_end -}}
|
| 132 |
-
{%- endif -%}
|
| 133 |
-
{%- if message.get('conclusion') -%}
|
| 134 |
-
{{- conclusion_start + message['conclusion'] + conclusion_end -}}
|
| 135 |
-
{%- endif -%}
|
| 136 |
-
{%- if message.get('tool_calls') -%}
|
| 137 |
-
{%- for tool in message['tool_calls'] -%}
|
| 138 |
-
{{- tool_call_start -}}
|
| 139 |
-
{%- if tool is mapping -%}
|
| 140 |
-
{{- function_name_start + tool.get('name', '') + function_name_end -}}
|
| 141 |
-
{{- function_args_start + (tool.get('arguments', '') | tojson if tool.get('arguments') is mapping else tool.get('arguments', '')) + function_args_end -}}
|
| 142 |
-
{%- else -%}
|
| 143 |
-
{{- tool -}}
|
| 144 |
-
{%- endif -%}
|
| 145 |
-
{{- tool_call_end -}}
|
| 146 |
-
{%- endfor -%}
|
| 147 |
-
{%- endif -%}
|
| 148 |
-
{%- if message.get('code') -%}
|
| 149 |
-
{{- code_start + message['code'] + code_end -}}
|
| 150 |
-
{%- endif -%}
|
| 151 |
-
{%- if message.get('exec') -%}
|
| 152 |
-
{{- exec_start + message['exec'] + exec_end -}}
|
| 153 |
-
{%- endif -%}
|
| 154 |
-
{%- if message.get('gen_image') -%}
|
| 155 |
-
{{- gen_image_start + message['gen_image'] + gen_image_end -}}
|
| 156 |
-
{%- endif -%}
|
| 157 |
-
{%- if message.get('gen_video') -%}
|
| 158 |
-
{{- gen_video_start + message['gen_video'] + gen_video_end -}}
|
| 159 |
-
{%- endif -%}
|
| 160 |
-
{%- if message.get('speak') -%}
|
| 161 |
-
{{- speak_start + message['speak'] + speak_end -}}
|
| 162 |
-
{%- endif -%}
|
| 163 |
-
{%- if message.get('uncertain') -%}
|
| 164 |
-
{{- uncertain_start + message['uncertain'] + uncertain_end -}}
|
| 165 |
-
{%- endif -%}
|
| 166 |
-
{%- if message.get('citation') -%}
|
| 167 |
-
{{- cite_start + message['citation'] + cite_end -}}
|
| 168 |
-
{%- endif -%}
|
| 169 |
-
{{- message['content'] -}}
|
| 170 |
{%- if not loop.last or add_generation_prompt is not defined or not add_generation_prompt -%}
|
| 171 |
{{- assistant_end -}}
|
| 172 |
{%- endif -%}
|
| 173 |
-
{%- elif message['role'] == 'tool' -%}
|
| 174 |
-
{{-
|
| 175 |
-
{%- elif message['role'] == 'exec_result' -%}
|
| 176 |
-
{{- exec_result + message['content'] + exec_result_end -}}
|
| 177 |
-
{%- elif message['role'] == 'jupyter' -%}
|
| 178 |
-
{{- jupyter_output + message['content'] + jupyter_output_end -}}
|
| 179 |
{%- endif -%}
|
| 180 |
{%- endfor -%}
|
| 181 |
{%- if add_generation_prompt is defined and add_generation_prompt -%}
|
| 182 |
{{- assistant_start -}}
|
| 183 |
-
{%- if enable_thinking is defined and enable_thinking -%}
|
| 184 |
-
{{- think_start -}}
|
| 185 |
-
{%- endif -%}
|
| 186 |
{%- endif -%}
|
|
|
|
| 6 |
{%- set user_end = '<|/user|>' -%}
|
| 7 |
{%- set assistant_start = '<|assistant|>' -%}
|
| 8 |
{%- set assistant_end = '<|/assistant|>' -%}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
{{- bos -}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
{%- for message in messages -%}
|
| 12 |
{%- if message['role'] == 'system' -%}
|
| 13 |
{{- system_start + message['content'] + system_end -}}
|
| 14 |
{%- elif message['role'] == 'user' -%}
|
| 15 |
+
{{- user_start + message['content'] + user_end -}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
{%- elif message['role'] == 'assistant' -%}
|
| 17 |
+
{{- assistant_start + message['content'] -}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
{%- if not loop.last or add_generation_prompt is not defined or not add_generation_prompt -%}
|
| 19 |
{{- assistant_end -}}
|
| 20 |
{%- endif -%}
|
| 21 |
+
{%- elif message['role'] == 'tool' or message['role'] == 'exec_result' or message['role'] == 'jupyter' -%}
|
| 22 |
+
{{- user_start + message['content'] + user_end -}}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
{%- endif -%}
|
| 24 |
{%- endfor -%}
|
| 25 |
{%- if add_generation_prompt is defined and add_generation_prompt -%}
|
| 26 |
{{- assistant_start -}}
|
|
|
|
|
|
|
|
|
|
| 27 |
{%- endif -%}
|
cross_attention.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 174191400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4018c034a9aebf411e6668da372c89e821fa86e0ee23ccae8c2d5950c7be81cc
|
| 3 |
size 174191400
|
llm.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1506832040
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fcf68c777631c00259dbee807f3270c93a3a296b2bc123a7fa33ee83c531ca2
|
| 3 |
size 1506832040
|
streaming_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"unique_samples":
|
| 4 |
-
"total_yields":
|
| 5 |
"dataset_positions": {
|
| 6 |
"WebSight": 386,
|
| 7 |
"ScienceQA": 364,
|
|
@@ -76,15 +76,15 @@
|
|
| 76 |
"Tool-Calls-SingleTurn": 200,
|
| 77 |
"Tool-Calls-Multiturn": 200,
|
| 78 |
"OpenAssistant": 450,
|
| 79 |
-
"T2V-Sora-Preferences-2":
|
| 80 |
-
"T2V-Human-Preferences":
|
| 81 |
"Sora-Alignment-Likert": 198,
|
| 82 |
"Sora-Style-Likert": 198,
|
| 83 |
"I2V-Preference-Seedance": 198,
|
| 84 |
-
"WebVid-10M":
|
| 85 |
"Sora-Physics-Likert": 198,
|
| 86 |
-
"TIP-I2V":
|
| 87 |
-
"Pexels-I2V-350k":
|
| 88 |
"SmolTalk-OpenHermes": 250,
|
| 89 |
"SmolTalk-All": 250
|
| 90 |
},
|
|
@@ -135,22 +135,22 @@
|
|
| 135 |
"MagicBrush": 386
|
| 136 |
},
|
| 137 |
"video": {
|
| 138 |
-
"T2V-Sora-Preferences-2":
|
| 139 |
-
"T2V-Human-Preferences":
|
| 140 |
"Sora-Alignment-Likert": 198,
|
| 141 |
"Sora-Style-Likert": 198,
|
| 142 |
"I2V-Preference-Seedance": 198,
|
| 143 |
-
"WebVid-10M":
|
| 144 |
"Sora-Physics-Likert": 198,
|
| 145 |
-
"TIP-I2V":
|
| 146 |
-
"Pexels-I2V-350k":
|
| 147 |
},
|
| 148 |
"audio": {}
|
| 149 |
},
|
| 150 |
"modality_counts": {
|
| 151 |
-
"text":
|
| 152 |
"image": 0,
|
| 153 |
-
"video":
|
| 154 |
"audio": 0
|
| 155 |
},
|
| 156 |
"last_modality": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 51,
|
| 3 |
+
"unique_samples": 250,
|
| 4 |
+
"total_yields": 500,
|
| 5 |
"dataset_positions": {
|
| 6 |
"WebSight": 386,
|
| 7 |
"ScienceQA": 364,
|
|
|
|
| 76 |
"Tool-Calls-SingleTurn": 200,
|
| 77 |
"Tool-Calls-Multiturn": 200,
|
| 78 |
"OpenAssistant": 450,
|
| 79 |
+
"T2V-Sora-Preferences-2": 300,
|
| 80 |
+
"T2V-Human-Preferences": 300,
|
| 81 |
"Sora-Alignment-Likert": 198,
|
| 82 |
"Sora-Style-Likert": 198,
|
| 83 |
"I2V-Preference-Seedance": 198,
|
| 84 |
+
"WebVid-10M": 300,
|
| 85 |
"Sora-Physics-Likert": 198,
|
| 86 |
+
"TIP-I2V": 300,
|
| 87 |
+
"Pexels-I2V-350k": 300,
|
| 88 |
"SmolTalk-OpenHermes": 250,
|
| 89 |
"SmolTalk-All": 250
|
| 90 |
},
|
|
|
|
| 135 |
"MagicBrush": 386
|
| 136 |
},
|
| 137 |
"video": {
|
| 138 |
+
"T2V-Sora-Preferences-2": 300,
|
| 139 |
+
"T2V-Human-Preferences": 300,
|
| 140 |
"Sora-Alignment-Likert": 198,
|
| 141 |
"Sora-Style-Likert": 198,
|
| 142 |
"I2V-Preference-Seedance": 198,
|
| 143 |
+
"WebVid-10M": 300,
|
| 144 |
"Sora-Physics-Likert": 198,
|
| 145 |
+
"TIP-I2V": 300,
|
| 146 |
+
"Pexels-I2V-350k": 300
|
| 147 |
},
|
| 148 |
"audio": {}
|
| 149 |
},
|
| 150 |
"modality_counts": {
|
| 151 |
+
"text": 0,
|
| 152 |
"image": 0,
|
| 153 |
+
"video": 250,
|
| 154 |
"audio": 0
|
| 155 |
},
|
| 156 |
"last_modality": null
|
trainer_state.json
CHANGED
|
@@ -1,32 +1,32 @@
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
-
"best_metric":
|
| 4 |
-
"epoch":
|
| 5 |
-
"epochs_completed":
|
| 6 |
-
"global_step":
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
-
"max_steps":
|
| 12 |
-
"num_train_epochs":
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
| 15 |
"effective_batch_size": 16,
|
| 16 |
"learning_rate": 0.0001,
|
| 17 |
"max_grad_norm": 1.0,
|
| 18 |
"trainable_components": [
|
|
|
|
|
|
|
| 19 |
"llm",
|
| 20 |
"cross_attention",
|
|
|
|
| 21 |
"modality_markers"
|
| 22 |
],
|
| 23 |
"frozen_components": [
|
| 24 |
-
"vision",
|
| 25 |
-
"video",
|
| 26 |
"audio",
|
| 27 |
"speech",
|
| 28 |
-
"image_generation"
|
| 29 |
-
"video_generation"
|
| 30 |
],
|
| 31 |
"trial_name": null,
|
| 32 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
+
"best_metric": 3.9784093894741965,
|
| 4 |
+
"epoch": 2,
|
| 5 |
+
"epochs_completed": 2,
|
| 6 |
+
"global_step": 62,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
+
"max_steps": 62,
|
| 12 |
+
"num_train_epochs": 2,
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
| 15 |
"effective_batch_size": 16,
|
| 16 |
"learning_rate": 0.0001,
|
| 17 |
"max_grad_norm": 1.0,
|
| 18 |
"trainable_components": [
|
| 19 |
+
"vision",
|
| 20 |
+
"video",
|
| 21 |
"llm",
|
| 22 |
"cross_attention",
|
| 23 |
+
"video_generation",
|
| 24 |
"modality_markers"
|
| 25 |
],
|
| 26 |
"frozen_components": [
|
|
|
|
|
|
|
| 27 |
"audio",
|
| 28 |
"speech",
|
| 29 |
+
"image_generation"
|
|
|
|
| 30 |
],
|
| 31 |
"trial_name": null,
|
| 32 |
"trial_params": null
|
training_state.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:996b90fe9af05150cee2a37b6f085ac2f23791df3a528e52813fe7de22153097
|
| 3 |
+
size 3426643671
|
video_generator.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 61574134
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a368345b4cc4a315a5258ee40047027e7c0b837907c3c15200877187899ab8be
|
| 3 |
size 61574134
|