Backup-bdg commited on
Commit
eca83de
·
verified ·
1 Parent(s): f5d4f0d

Update model weights after training (epoch 2, loss 3.9784)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be283b159de0c4a206d250a8791de6c6dd88188cbf4bca57c4ff4f1b0b83ebf7
3
  size 1458410612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e6f986a4bddecd8227e793979c365ea238167aa0d87886dc13ece8b990cd89
3
  size 1458410612
chat_template.jinja CHANGED
@@ -6,181 +6,22 @@
6
  {%- set user_end = '<|/user|>' -%}
7
  {%- set assistant_start = '<|assistant|>' -%}
8
  {%- set assistant_end = '<|/assistant|>' -%}
9
- {%- set image_start = '<|image|>' -%}
10
- {%- set image_end = '<|/image|>' -%}
11
- {%- set video_start = '<|video|>' -%}
12
- {%- set video_end = '<|/video|>' -%}
13
- {%- set audio_start = '<|audio|>' -%}
14
- {%- set audio_end = '<|/audio|>' -%}
15
- {%- set doc_start = '<|doc|>' -%}
16
- {%- set doc_end = '<|/doc|>' -%}
17
- {%- set tool_call_start = '<|tool_call|>' -%}
18
- {%- set tool_call_end = '<|/tool_call|>' -%}
19
- {%- set tool_result_start = '<|tool_result|>' -%}
20
- {%- set tool_result_end = '<|/tool_result|>' -%}
21
- {%- set tools_start = '<|tools|>' -%}
22
- {%- set tools_end = '<|/tools|>' -%}
23
- {%- set available_tools_start = '<|available_tools|>' -%}
24
- {%- set available_tools_end = '<|/available_tools|>' -%}
25
- {%- set function_name_start = '<|function_name|>' -%}
26
- {%- set function_name_end = '<|/function_name|>' -%}
27
- {%- set function_args_start = '<|function_args|>' -%}
28
- {%- set function_args_end = '<|/function_args|>' -%}
29
- {%- set think_start = '<|think|>' -%}
30
- {%- set think_end = '<|/think|>' -%}
31
- {%- set plan_start = '<|plan|>' -%}
32
- {%- set plan_end = '<|/plan|>' -%}
33
- {%- set critique_start = '<|critique|>' -%}
34
- {%- set critique_end = '<|/critique|>' -%}
35
- {%- set analysis_start = '<|analysis|>' -%}
36
- {%- set analysis_end = '<|/analysis|>' -%}
37
- {%- set observation_start = '<|observation|>' -%}
38
- {%- set observation_end = '<|/observation|>' -%}
39
- {%- set reflection_start = '<|reflection|>' -%}
40
- {%- set reflection_end = '<|/reflection|>' -%}
41
- {%- set conclusion_start = '<|conclusion|>' -%}
42
- {%- set conclusion_end = '<|/conclusion|>' -%}
43
- {%- set code_start = '<|code|>' -%}
44
- {%- set code_end = '<|/code|>' -%}
45
- {%- set exec_start = '<|exec|>' -%}
46
- {%- set exec_end = '<|/exec|>' -%}
47
- {%- set exec_result = '<|exec_result|>' -%}
48
- {%- set exec_result_end = '<|/exec_result|>' -%}
49
- {%- set jupyter_code = '<|jupyter_code|>' -%}
50
- {%- set jupyter_code_end = '<|/jupyter_code|>' -%}
51
- {%- set jupyter_output = '<|jupyter_output|>' -%}
52
- {%- set jupyter_output_end = '<|/jupyter_output|>' -%}
53
- {%- set gen_image_start = '<|gen_image|>' -%}
54
- {%- set gen_image_end = '<|/gen_image|>' -%}
55
- {%- set gen_video_start = '<|gen_video|>' -%}
56
- {%- set gen_video_end = '<|/gen_video|>' -%}
57
- {%- set speak_start = '<|speak|>' -%}
58
- {%- set speak_end = '<|/speak|>' -%}
59
- {%- set listen_start = '<|listen|>' -%}
60
- {%- set listen_end = '<|/listen|>' -%}
61
- {%- set memory_start = '<|memory|>' -%}
62
- {%- set memory_end = '<|/memory|>' -%}
63
- {%- set context_start = '<|context|>' -%}
64
- {%- set context_end = '<|/context|>' -%}
65
- {%- set uncertain_start = '<|uncertain|>' -%}
66
- {%- set uncertain_end = '<|/uncertain|>' -%}
67
- {%- set cite_start = '<|cite|>' -%}
68
- {%- set cite_end = '<|/cite|>' -%}
69
- {%- set eod = '<|eod|>' -%}
70
 
71
  {{- bos -}}
72
- {%- if messages[0]['role'] == 'system' -%}
73
- {{- system_start + messages[0]['content'] + system_end -}}
74
- {%- set messages = messages[1:] -%}
75
- {%- endif -%}
76
- {%- if available_tools is defined and available_tools -%}
77
- {{- available_tools_start + available_tools + available_tools_end -}}
78
- {%- elif tools is defined and tools -%}
79
- {{- tools_start + tools + tools_end -}}
80
- {%- endif -%}
81
- {%- if memory is defined and memory -%}
82
- {{- memory_start + memory + memory_end -}}
83
- {%- endif -%}
84
- {%- if context is defined and context -%}
85
- {{- context_start + context + context_end -}}
86
- {%- endif -%}
87
  {%- for message in messages -%}
88
  {%- if message['role'] == 'system' -%}
89
  {{- system_start + message['content'] + system_end -}}
90
  {%- elif message['role'] == 'user' -%}
91
- {{- user_start -}}
92
- {%- if message.get('images') -%}
93
- {%- for img in message['images'] -%}
94
- {{- image_start + img + image_end -}}
95
- {%- endfor -%}
96
- {%- endif -%}
97
- {%- if message.get('videos') -%}
98
- {%- for vid in message['videos'] -%}
99
- {{- video_start + vid + video_end -}}
100
- {%- endfor -%}
101
- {%- endif -%}
102
- {%- if message.get('audio') -%}
103
- {%- for aud in message['audio'] -%}
104
- {{- audio_start + aud + audio_end -}}
105
- {%- endfor -%}
106
- {%- endif -%}
107
- {%- if message.get('documents') -%}
108
- {%- for doc in message['documents'] -%}
109
- {{- doc_start + doc + doc_end -}}
110
- {%- endfor -%}
111
- {%- endif -%}
112
- {{- message['content'] + user_end -}}
113
  {%- elif message['role'] == 'assistant' -%}
114
- {{- assistant_start -}}
115
- {%- if message.get('thinking') -%}
116
- {{- think_start + message['thinking'] + think_end -}}
117
- {%- endif -%}
118
- {%- if message.get('planning') -%}
119
- {{- plan_start + message['planning'] + plan_end -}}
120
- {%- endif -%}
121
- {%- if message.get('analysis') -%}
122
- {{- analysis_start + message['analysis'] + analysis_end -}}
123
- {%- endif -%}
124
- {%- if message.get('observation') -%}
125
- {{- observation_start + message['observation'] + observation_end -}}
126
- {%- endif -%}
127
- {%- if message.get('reflection') -%}
128
- {{- reflection_start + message['reflection'] + reflection_end -}}
129
- {%- endif -%}
130
- {%- if message.get('critique') -%}
131
- {{- critique_start + message['critique'] + critique_end -}}
132
- {%- endif -%}
133
- {%- if message.get('conclusion') -%}
134
- {{- conclusion_start + message['conclusion'] + conclusion_end -}}
135
- {%- endif -%}
136
- {%- if message.get('tool_calls') -%}
137
- {%- for tool in message['tool_calls'] -%}
138
- {{- tool_call_start -}}
139
- {%- if tool is mapping -%}
140
- {{- function_name_start + tool.get('name', '') + function_name_end -}}
141
- {{- function_args_start + (tool.get('arguments', '') | tojson if tool.get('arguments') is mapping else tool.get('arguments', '')) + function_args_end -}}
142
- {%- else -%}
143
- {{- tool -}}
144
- {%- endif -%}
145
- {{- tool_call_end -}}
146
- {%- endfor -%}
147
- {%- endif -%}
148
- {%- if message.get('code') -%}
149
- {{- code_start + message['code'] + code_end -}}
150
- {%- endif -%}
151
- {%- if message.get('exec') -%}
152
- {{- exec_start + message['exec'] + exec_end -}}
153
- {%- endif -%}
154
- {%- if message.get('gen_image') -%}
155
- {{- gen_image_start + message['gen_image'] + gen_image_end -}}
156
- {%- endif -%}
157
- {%- if message.get('gen_video') -%}
158
- {{- gen_video_start + message['gen_video'] + gen_video_end -}}
159
- {%- endif -%}
160
- {%- if message.get('speak') -%}
161
- {{- speak_start + message['speak'] + speak_end -}}
162
- {%- endif -%}
163
- {%- if message.get('uncertain') -%}
164
- {{- uncertain_start + message['uncertain'] + uncertain_end -}}
165
- {%- endif -%}
166
- {%- if message.get('citation') -%}
167
- {{- cite_start + message['citation'] + cite_end -}}
168
- {%- endif -%}
169
- {{- message['content'] -}}
170
  {%- if not loop.last or add_generation_prompt is not defined or not add_generation_prompt -%}
171
  {{- assistant_end -}}
172
  {%- endif -%}
173
- {%- elif message['role'] == 'tool' -%}
174
- {{- tool_result_start + message['content'] + tool_result_end -}}
175
- {%- elif message['role'] == 'exec_result' -%}
176
- {{- exec_result + message['content'] + exec_result_end -}}
177
- {%- elif message['role'] == 'jupyter' -%}
178
- {{- jupyter_output + message['content'] + jupyter_output_end -}}
179
  {%- endif -%}
180
  {%- endfor -%}
181
  {%- if add_generation_prompt is defined and add_generation_prompt -%}
182
  {{- assistant_start -}}
183
- {%- if enable_thinking is defined and enable_thinking -%}
184
- {{- think_start -}}
185
- {%- endif -%}
186
  {%- endif -%}
 
6
  {%- set user_end = '<|/user|>' -%}
7
  {%- set assistant_start = '<|assistant|>' -%}
8
  {%- set assistant_end = '<|/assistant|>' -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  {{- bos -}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {%- for message in messages -%}
12
  {%- if message['role'] == 'system' -%}
13
  {{- system_start + message['content'] + system_end -}}
14
  {%- elif message['role'] == 'user' -%}
15
+ {{- user_start + message['content'] + user_end -}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  {%- elif message['role'] == 'assistant' -%}
17
+ {{- assistant_start + message['content'] -}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  {%- if not loop.last or add_generation_prompt is not defined or not add_generation_prompt -%}
19
  {{- assistant_end -}}
20
  {%- endif -%}
21
+ {%- elif message['role'] == 'tool' or message['role'] == 'exec_result' or message['role'] == 'jupyter' -%}
22
+ {{- user_start + message['content'] + user_end -}}
 
 
 
 
23
  {%- endif -%}
24
  {%- endfor -%}
25
  {%- if add_generation_prompt is defined and add_generation_prompt -%}
26
  {{- assistant_start -}}
 
 
 
27
  {%- endif -%}
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:879ba97c8a30d794181570c76bd10ecbb10fb84fabcb10047d7f5d7f944cc707
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4018c034a9aebf411e6668da372c89e821fa86e0ee23ccae8c2d5950c7be81cc
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b534cad0e5014cdd5984e8a4bd04771ffc7e701c12cea42b5467e4d051224d9
3
  size 1506832040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fcf68c777631c00259dbee807f3270c93a3a296b2bc123a7fa33ee83c531ca2
3
  size 1506832040
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 48,
3
- "unique_samples": 50,
4
- "total_yields": 100,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -76,15 +76,15 @@
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
  "OpenAssistant": 450,
79
- "T2V-Sora-Preferences-2": 200,
80
- "T2V-Human-Preferences": 200,
81
  "Sora-Alignment-Likert": 198,
82
  "Sora-Style-Likert": 198,
83
  "I2V-Preference-Seedance": 198,
84
- "WebVid-10M": 200,
85
  "Sora-Physics-Likert": 198,
86
- "TIP-I2V": 200,
87
- "Pexels-I2V-350k": 200,
88
  "SmolTalk-OpenHermes": 250,
89
  "SmolTalk-All": 250
90
  },
@@ -135,22 +135,22 @@
135
  "MagicBrush": 386
136
  },
137
  "video": {
138
- "T2V-Sora-Preferences-2": 200,
139
- "T2V-Human-Preferences": 200,
140
  "Sora-Alignment-Likert": 198,
141
  "Sora-Style-Likert": 198,
142
  "I2V-Preference-Seedance": 198,
143
- "WebVid-10M": 200,
144
  "Sora-Physics-Likert": 198,
145
- "TIP-I2V": 200,
146
- "Pexels-I2V-350k": 200
147
  },
148
  "audio": {}
149
  },
150
  "modality_counts": {
151
- "text": 50,
152
  "image": 0,
153
- "video": 0,
154
  "audio": 0
155
  },
156
  "last_modality": null
 
1
  {
2
+ "epoch": 51,
3
+ "unique_samples": 250,
4
+ "total_yields": 500,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
  "OpenAssistant": 450,
79
+ "T2V-Sora-Preferences-2": 300,
80
+ "T2V-Human-Preferences": 300,
81
  "Sora-Alignment-Likert": 198,
82
  "Sora-Style-Likert": 198,
83
  "I2V-Preference-Seedance": 198,
84
+ "WebVid-10M": 300,
85
  "Sora-Physics-Likert": 198,
86
+ "TIP-I2V": 300,
87
+ "Pexels-I2V-350k": 300,
88
  "SmolTalk-OpenHermes": 250,
89
  "SmolTalk-All": 250
90
  },
 
135
  "MagicBrush": 386
136
  },
137
  "video": {
138
+ "T2V-Sora-Preferences-2": 300,
139
+ "T2V-Human-Preferences": 300,
140
  "Sora-Alignment-Likert": 198,
141
  "Sora-Style-Likert": 198,
142
  "I2V-Preference-Seedance": 198,
143
+ "WebVid-10M": 300,
144
  "Sora-Physics-Likert": 198,
145
+ "TIP-I2V": 300,
146
+ "Pexels-I2V-350k": 300
147
  },
148
  "audio": {}
149
  },
150
  "modality_counts": {
151
+ "text": 0,
152
  "image": 0,
153
+ "video": 250,
154
  "audio": 0
155
  },
156
  "last_modality": null
trainer_state.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 4.869536457061767,
4
- "epoch": 7,
5
- "epochs_completed": 7,
6
- "global_step": 42,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 42,
12
- "num_train_epochs": 7,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
 
 
19
  "llm",
20
  "cross_attention",
 
21
  "modality_markers"
22
  ],
23
  "frozen_components": [
24
- "vision",
25
- "video",
26
  "audio",
27
  "speech",
28
- "image_generation",
29
- "video_generation"
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 3.9784093894741965,
4
+ "epoch": 2,
5
+ "epochs_completed": 2,
6
+ "global_step": 62,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 62,
12
+ "num_train_epochs": 2,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
19
+ "vision",
20
+ "video",
21
  "llm",
22
  "cross_attention",
23
+ "video_generation",
24
  "modality_markers"
25
  ],
26
  "frozen_components": [
 
 
27
  "audio",
28
  "speech",
29
+ "image_generation"
 
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b7335b590f20d3da7af0b586bde304e2566d2306489e64d38993d1cd20df627
3
- size 1514911851
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996b90fe9af05150cee2a37b6f085ac2f23791df3a528e52813fe7de22153097
3
+ size 3426643671
video_generator.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4b113c1d2cf10b7fad0c03661c1093738604762583c5c8f0fb0c8c84bcdc6f4
3
  size 61574134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a368345b4cc4a315a5258ee40047027e7c0b837907c3c15200877187899ab8be
3
  size 61574134