Update model weights after training (epoch 4, loss 3.8692)

Files changed (7) hide show

audio_decoder.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:840aaf132e4830dfcfa0634d27acab02841f8eb9fffbfe4f78377c1d50aa050a
 size 1458410612

 version https://git-lfs.github.com/spec/v1
+oid sha256:0644bb8cb74a2a1d0e055138e41ec52d65d83dca9bc9466cbdd8f388f1aa96b2
 size 1458410612

cross_attention.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5343b1fe1af46ca860a51de6f3bd51d1843f70998850084f805c875aec2de030
 size 174191400

 version https://git-lfs.github.com/spec/v1
+oid sha256:10a70bf7bf4edce737146b199b106166957aa843440edfc45831f1d6033b7e11
 size 174191400

llm.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c2d81eae0ff676724bf38cf020b2e6317e609eb90d43150ffe91610e67864e7
 size 1506831304

 version https://git-lfs.github.com/spec/v1
+oid sha256:b78daf2a6be38a3c0753175dd705363f8a348dc24b7d7a6fb9539715c530f22e
 size 1506831304

streaming_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "epoch": 19,
-  "unique_samples": 300,
-  "total_yields": 600,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
@@ -75,7 +75,16 @@
     "Synth-Debugging": 200,
     "Tool-Calls-SingleTurn": 200,
     "Tool-Calls-Multiturn": 200,
-    "OpenAssistant": 200
   },
   "modality_positions": {
     "text": {
@@ -121,13 +130,23 @@
       "Football": 6,
       "MagicBrush": 386
     },
-    "video": {},
     "audio": {}
   },
   "modality_counts": {
     "text": 0,
-    "image": 300,
-    "video": 0,
     "audio": 0
   },
   "last_modality": null

 {
+  "epoch": 26,
+  "unique_samples": 586,
+  "total_yields": 1172,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
     "Synth-Debugging": 200,
     "Tool-Calls-SingleTurn": 200,
     "Tool-Calls-Multiturn": 200,
+    "OpenAssistant": 200,
+    "T2V-Sora-Preferences-2": 200,
+    "T2V-Human-Preferences": 200,
+    "Sora-Alignment-Likert": 198,
+    "Sora-Style-Likert": 198,
+    "I2V-Preference-Seedance": 198,
+    "WebVid-10M": 200,
+    "Sora-Physics-Likert": 198,
+    "TIP-I2V": 200,
+    "Pexels-I2V-350k": 200
   },
   "modality_positions": {
     "text": {
       "Football": 6,
       "MagicBrush": 386
     },
+    "video": {
+      "T2V-Sora-Preferences-2": 200,
+      "T2V-Human-Preferences": 200,
+      "Sora-Alignment-Likert": 198,
+      "Sora-Style-Likert": 198,
+      "I2V-Preference-Seedance": 198,
+      "WebVid-10M": 200,
+      "Sora-Physics-Likert": 198,
+      "TIP-I2V": 200,
+      "Pexels-I2V-350k": 200
+    },
     "audio": {}
   },
   "modality_counts": {
     "text": 0,
+    "image": 0,
+    "video": 586,
     "audio": 0
   },
   "last_modality": null

trainer_state.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
-  "best_metric": 3.0820325045382684,
   "epoch": 4,
   "epochs_completed": 4,
-  "global_step": 148,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
-  "max_steps": 148,
   "num_train_epochs": 4,
   "total_flos": 0,
   "train_batch_size": 1,
@@ -17,16 +17,16 @@
   "max_grad_norm": 1.0,
   "trainable_components": [
     "vision",
     "llm",
     "cross_attention",
-    "image_generation",
     "modality_markers"
   ],
   "frozen_components": [
-    "video",
     "audio",
     "speech",
-    "video_generation"
   ],
   "trial_name": null,
   "trial_params": null

 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
+  "best_metric": 3.869171884744816,
   "epoch": 4,
   "epochs_completed": 4,
+  "global_step": 298,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
+  "max_steps": 298,
   "num_train_epochs": 4,
   "total_flos": 0,
   "train_batch_size": 1,
   "max_grad_norm": 1.0,
   "trainable_components": [
     "vision",
+    "video",
     "llm",
     "cross_attention",
+    "video_generation",
     "modality_markers"
   ],
   "frozen_components": [
     "audio",
     "speech",
+    "image_generation"
   ],
   "trial_name": null,
   "trial_params": null

training_state.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d17b71b1b8d8d73a29371b107d2020d349cf453a9089b49b44d1b5cb446fba74
-size 1419723549

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9b37a03cba59de5ddbc9ab88c301e76b8a0fa5bc81d6d471cbefe513d0699cf
+size 724684421

video_generator.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c68805a467c37a4b172786a99fb83589c8a4e40b7b7a40886176cf1fd2188dc5
 size 61574134

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4b113c1d2cf10b7fad0c03661c1093738604762583c5c8f0fb0c8c84bcdc6f4
 size 61574134