Backup-bdg commited on
Commit
f6dd063
·
verified ·
1 Parent(s): 92ae801

Update model weights after training (epoch 4, loss 3.8692)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:840aaf132e4830dfcfa0634d27acab02841f8eb9fffbfe4f78377c1d50aa050a
3
  size 1458410612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0644bb8cb74a2a1d0e055138e41ec52d65d83dca9bc9466cbdd8f388f1aa96b2
3
  size 1458410612
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5343b1fe1af46ca860a51de6f3bd51d1843f70998850084f805c875aec2de030
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10a70bf7bf4edce737146b199b106166957aa843440edfc45831f1d6033b7e11
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c2d81eae0ff676724bf38cf020b2e6317e609eb90d43150ffe91610e67864e7
3
  size 1506831304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b78daf2a6be38a3c0753175dd705363f8a348dc24b7d7a6fb9539715c530f22e
3
  size 1506831304
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 19,
3
- "unique_samples": 300,
4
- "total_yields": 600,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -75,7 +75,16 @@
75
  "Synth-Debugging": 200,
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
- "OpenAssistant": 200
 
 
 
 
 
 
 
 
 
79
  },
80
  "modality_positions": {
81
  "text": {
@@ -121,13 +130,23 @@
121
  "Football": 6,
122
  "MagicBrush": 386
123
  },
124
- "video": {},
 
 
 
 
 
 
 
 
 
 
125
  "audio": {}
126
  },
127
  "modality_counts": {
128
  "text": 0,
129
- "image": 300,
130
- "video": 0,
131
  "audio": 0
132
  },
133
  "last_modality": null
 
1
  {
2
+ "epoch": 26,
3
+ "unique_samples": 586,
4
+ "total_yields": 1172,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
75
  "Synth-Debugging": 200,
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
+ "OpenAssistant": 200,
79
+ "T2V-Sora-Preferences-2": 200,
80
+ "T2V-Human-Preferences": 200,
81
+ "Sora-Alignment-Likert": 198,
82
+ "Sora-Style-Likert": 198,
83
+ "I2V-Preference-Seedance": 198,
84
+ "WebVid-10M": 200,
85
+ "Sora-Physics-Likert": 198,
86
+ "TIP-I2V": 200,
87
+ "Pexels-I2V-350k": 200
88
  },
89
  "modality_positions": {
90
  "text": {
 
130
  "Football": 6,
131
  "MagicBrush": 386
132
  },
133
+ "video": {
134
+ "T2V-Sora-Preferences-2": 200,
135
+ "T2V-Human-Preferences": 200,
136
+ "Sora-Alignment-Likert": 198,
137
+ "Sora-Style-Likert": 198,
138
+ "I2V-Preference-Seedance": 198,
139
+ "WebVid-10M": 200,
140
+ "Sora-Physics-Likert": 198,
141
+ "TIP-I2V": 200,
142
+ "Pexels-I2V-350k": 200
143
+ },
144
  "audio": {}
145
  },
146
  "modality_counts": {
147
  "text": 0,
148
+ "image": 0,
149
+ "video": 586,
150
  "audio": 0
151
  },
152
  "last_modality": null
trainer_state.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 3.0820325045382684,
4
  "epoch": 4,
5
  "epochs_completed": 4,
6
- "global_step": 148,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 148,
12
  "num_train_epochs": 4,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
@@ -17,16 +17,16 @@
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
19
  "vision",
 
20
  "llm",
21
  "cross_attention",
22
- "image_generation",
23
  "modality_markers"
24
  ],
25
  "frozen_components": [
26
- "video",
27
  "audio",
28
  "speech",
29
- "video_generation"
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 3.869171884744816,
4
  "epoch": 4,
5
  "epochs_completed": 4,
6
+ "global_step": 298,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 298,
12
  "num_train_epochs": 4,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
 
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
19
  "vision",
20
+ "video",
21
  "llm",
22
  "cross_attention",
23
+ "video_generation",
24
  "modality_markers"
25
  ],
26
  "frozen_components": [
 
27
  "audio",
28
  "speech",
29
+ "image_generation"
30
  ],
31
  "trial_name": null,
32
  "trial_params": null
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d17b71b1b8d8d73a29371b107d2020d349cf453a9089b49b44d1b5cb446fba74
3
- size 1419723549
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9b37a03cba59de5ddbc9ab88c301e76b8a0fa5bc81d6d471cbefe513d0699cf
3
+ size 724684421
video_generator.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c68805a467c37a4b172786a99fb83589c8a4e40b7b7a40886176cf1fd2188dc5
3
  size 61574134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4b113c1d2cf10b7fad0c03661c1093738604762583c5c8f0fb0c8c84bcdc6f4
3
  size 61574134