Backup-bdg commited on
Commit
1fcfda0
·
verified ·
1 Parent(s): 0ec3133

Update model weights after training (epoch 1, loss 3.3989)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a240617216d0644ac615eed664398b69c732420dba3c0121a5c1344652fb7fa
3
  size 1458410612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d817de2ba9f31539807a8d57d1ad5441f33794329008e0a6b9e01764b831f909
3
  size 1458410612
config.json CHANGED
@@ -49,7 +49,7 @@
49
  "image_size_step": 32,
50
  "video_min_size": 128,
51
  "video_max_size": 320,
52
- "video_base_size": 320,
53
  "video_size_step": 32,
54
  "video_min_frames": 8,
55
  "video_max_frames": 8,
 
49
  "image_size_step": 32,
50
  "video_min_size": 128,
51
  "video_max_size": 320,
52
+ "video_base_size": 128,
53
  "video_size_step": 32,
54
  "video_min_frames": 8,
55
  "video_max_frames": 8,
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d5b0a5a4040b026f16479ff817aadab4e42a2281750c7728f70aba9fd988a1f
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6beff1e6cfb37ea461f112bf9d138ca007c01e24ac716b997a92000813aa8de5
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeebe91f597a9aba32227820d24ec97ec34c5680090cd85ad3006eeefb812081
3
  size 1506832040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b168f1e28965acb01ab0375c81614f3af6cd312b27c630633ce21c555d8ab3b5
3
  size 1506832040
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 84,
3
- "unique_samples": 150,
4
- "total_yields": 300,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -76,15 +76,15 @@
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
  "OpenAssistant": 450,
79
- "T2V-Sora-Preferences-2": 600,
80
- "T2V-Human-Preferences": 600,
81
  "Sora-Alignment-Likert": 198,
82
  "Sora-Style-Likert": 198,
83
  "I2V-Preference-Seedance": 198,
84
- "WebVid-10M": 600,
85
  "Sora-Physics-Likert": 198,
86
- "TIP-I2V": 600,
87
- "Pexels-I2V-350k": 600,
88
  "SmolTalk-OpenHermes": 250,
89
  "SmolTalk-All": 250,
90
  "Cosmopedia-AutoMath": 250,
@@ -157,22 +157,22 @@
157
  "MagicBrush": 386
158
  },
159
  "video": {
160
- "T2V-Sora-Preferences-2": 600,
161
- "T2V-Human-Preferences": 600,
162
  "Sora-Alignment-Likert": 198,
163
  "Sora-Style-Likert": 198,
164
  "I2V-Preference-Seedance": 198,
165
- "WebVid-10M": 600,
166
  "Sora-Physics-Likert": 198,
167
- "TIP-I2V": 600,
168
- "Pexels-I2V-350k": 600
169
  },
170
  "audio": {}
171
  },
172
  "modality_counts": {
173
- "text": 150,
174
  "image": 0,
175
- "video": 0,
176
  "audio": 0
177
  },
178
  "last_modality": null
 
1
  {
2
+ "epoch": 85,
3
+ "unique_samples": 400,
4
+ "total_yields": 800,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
  "OpenAssistant": 450,
79
+ "T2V-Sora-Preferences-2": 650,
80
+ "T2V-Human-Preferences": 650,
81
  "Sora-Alignment-Likert": 198,
82
  "Sora-Style-Likert": 198,
83
  "I2V-Preference-Seedance": 198,
84
+ "WebVid-10M": 650,
85
  "Sora-Physics-Likert": 198,
86
+ "TIP-I2V": 650,
87
+ "Pexels-I2V-350k": 650,
88
  "SmolTalk-OpenHermes": 250,
89
  "SmolTalk-All": 250,
90
  "Cosmopedia-AutoMath": 250,
 
157
  "MagicBrush": 386
158
  },
159
  "video": {
160
+ "T2V-Sora-Preferences-2": 650,
161
+ "T2V-Human-Preferences": 650,
162
  "Sora-Alignment-Likert": 198,
163
  "Sora-Style-Likert": 198,
164
  "I2V-Preference-Seedance": 198,
165
+ "WebVid-10M": 650,
166
  "Sora-Physics-Likert": 198,
167
+ "TIP-I2V": 650,
168
+ "Pexels-I2V-350k": 650
169
  },
170
  "audio": {}
171
  },
172
  "modality_counts": {
173
+ "text": 0,
174
  "image": 0,
175
+ "video": 250,
176
  "audio": 0
177
  },
178
  "last_modality": null
trainer_state.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 7.186485166748365,
4
- "epoch": 7,
5
- "epochs_completed": 7,
6
- "global_step": 126,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 126,
12
- "num_train_epochs": 7,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
 
 
19
  "llm",
20
  "cross_attention",
 
21
  "modality_markers"
22
  ],
23
  "frozen_components": [
24
- "vision",
25
- "video",
26
  "audio",
27
  "speech",
28
- "image_generation",
29
- "video_generation"
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 3.398919365755515,
4
+ "epoch": 1,
5
+ "epochs_completed": 1,
6
+ "global_step": 31,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 31,
12
+ "num_train_epochs": 1,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
19
+ "vision",
20
+ "video",
21
  "llm",
22
  "cross_attention",
23
+ "video_generation",
24
  "modality_markers"
25
  ],
26
  "frozen_components": [
 
 
27
  "audio",
28
  "speech",
29
+ "image_generation"
 
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6733079c08d0e2c4e1bec055b3d45e16f552e09e2b8027d98d7dba03554b4300
3
- size 1514911851
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b224a38701068628ea2346719232695d255cff3500d63df4b888e5a94eab7ab4
3
+ size 3426643671
video_generator.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70eaa4447488bf781bc930d6054b5439f3a21b610f298399b947cf89d457a101
3
  size 61574134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17764963de9c77345a4b2ae8b508c3c4c7cb5bf15aa65b7c5239c3e8babc1ce
3
  size 61574134