Backup-bdg commited on
Commit
39c6cc5
·
verified ·
1 Parent(s): 028ad74

Update model weights after training (epoch 5, loss 6.5644)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5908d239f6fbfc57829c2cbb1d27f6be5dd66124afde0e57fa61a908465aaeb2
3
  size 1458410612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e161c50803ebcf38666c109dde1baef4dc92fa5db9967fbd8e72f2b5af392b76
3
  size 1458410612
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15d8ec9d811b147a55801291ea133030f28ab124bb05ce6e6695198c9ba9983b
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba284496e9f5135658519fcb742fe092211dfd5df2aa73ac4e3effa31fb5319
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b686364d4e646df1bbeb0448c37d3a4da241b0c2e22cb8aec05244d3fa2f5aa7
3
  size 1506832040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:050b46cae242ff76f36def2a463e491fd494e2a8f5ba239229e8cd25851300e5
3
  size 1506832040
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 62,
3
- "unique_samples": 250,
4
- "total_yields": 500,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -86,7 +86,15 @@
86
  "TIP-I2V": 600,
87
  "Pexels-I2V-350k": 600,
88
  "SmolTalk-OpenHermes": 250,
89
- "SmolTalk-All": 250
 
 
 
 
 
 
 
 
90
  },
91
  "modality_positions": {
92
  "text": {
@@ -123,7 +131,15 @@
123
  "Tool-Calls-Multiturn": 200,
124
  "OpenAssistant": 450,
125
  "SmolTalk-OpenHermes": 250,
126
- "SmolTalk-All": 250
 
 
 
 
 
 
 
 
127
  },
128
  "image": {
129
  "WebSight": 386,
@@ -148,9 +164,9 @@
148
  "audio": {}
149
  },
150
  "modality_counts": {
151
- "text": 0,
152
  "image": 0,
153
- "video": 250,
154
  "audio": 0
155
  },
156
  "last_modality": null
 
1
  {
2
+ "epoch": 71,
3
+ "unique_samples": 400,
4
+ "total_yields": 800,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
86
  "TIP-I2V": 600,
87
  "Pexels-I2V-350k": 600,
88
  "SmolTalk-OpenHermes": 250,
89
+ "SmolTalk-All": 250,
90
+ "Cosmopedia-AutoMath": 250,
91
+ "OpenMathInstruct-1": 250,
92
+ "NuminaMath-CoT": 250,
93
+ "UltraData-Math-Conv": 250,
94
+ "Cosmopedia-KhanAcademy": 250,
95
+ "NuminaMath-TIR": 250,
96
+ "UltraData-Math-QA": 250,
97
+ "Cosmopedia-OpenStax": 250
98
  },
99
  "modality_positions": {
100
  "text": {
 
131
  "Tool-Calls-Multiturn": 200,
132
  "OpenAssistant": 450,
133
  "SmolTalk-OpenHermes": 250,
134
+ "SmolTalk-All": 250,
135
+ "Cosmopedia-AutoMath": 250,
136
+ "OpenMathInstruct-1": 250,
137
+ "NuminaMath-CoT": 250,
138
+ "UltraData-Math-Conv": 250,
139
+ "Cosmopedia-KhanAcademy": 250,
140
+ "NuminaMath-TIR": 250,
141
+ "UltraData-Math-QA": 250,
142
+ "Cosmopedia-OpenStax": 250
143
  },
144
  "image": {
145
  "WebSight": 386,
 
164
  "audio": {}
165
  },
166
  "modality_counts": {
167
+ "text": 400,
168
  "image": 0,
169
+ "video": 0,
170
  "audio": 0
171
  },
172
  "last_modality": null
trainer_state.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 2.7717735348048738,
4
- "epoch": 6,
5
- "epochs_completed": 6,
6
- "global_step": 186,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 186,
12
- "num_train_epochs": 6,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
19
- "vision",
20
- "video",
21
  "llm",
22
  "cross_attention",
23
- "video_generation",
24
  "modality_markers"
25
  ],
26
  "frozen_components": [
 
 
27
  "audio",
28
  "speech",
29
- "image_generation"
 
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 6.564389287829399,
4
+ "epoch": 5,
5
+ "epochs_completed": 5,
6
+ "global_step": 250,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 250,
12
+ "num_train_epochs": 5,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
 
 
19
  "llm",
20
  "cross_attention",
 
21
  "modality_markers"
22
  ],
23
  "frozen_components": [
24
+ "vision",
25
+ "video",
26
  "audio",
27
  "speech",
28
+ "image_generation",
29
+ "video_generation"
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2b27f88bfa4b75b4f8d0e5a76bd83c68286060a451b136de1eeb07a923ed70a
3
- size 3426643671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b617b2a3ab7ba08cac1c55c6a02085d0d226885f2a225d7626553579ac8029ab
3
+ size 1514911851