Update model weights after training (epoch 4, loss 3.6467)

Browse files

Files changed (6) hide show

audio_decoder.safetensors +1 -1
cross_attention.safetensors +1 -1
llm.safetensors +1 -1
streaming_state.json +106 -7
trainer_state.json +8 -8
training_state.pt +2 -2

audio_decoder.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cc0af3086d6987e71c00b7121394b8ac820d2276ff994014479d4fc2cf094bf
 size 1458415836

 version https://git-lfs.github.com/spec/v1
+oid sha256:56f890d89fb28ae5df4b1ab79c42b0c29edc81203f2a99ea077d47909b6d128a
 size 1458415836

cross_attention.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9034e718a6461ed04d5723c8ecf429d0daedb7fc49274a1fbd17b80bb9dd77b9
 size 174191400

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d6a01050517c1c2762257ca7f0c03259704ec620070948f9cccd9c26476fcae
 size 174191400

llm.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4ce3c75e0ec09d93c0580ed862f12191e3ea3bd53ab8906e0108cafd5d6fc18
 size 1506831304

 version https://git-lfs.github.com/spec/v1
+oid sha256:816725a4e4eaaf6f5a2bb5c3cb678c13f298ccc280937e88c3c947d9fc052fb3
 size 1506831304

streaming_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "epoch": 5,
-  "unique_samples": 1100,
-  "total_yields": 2200,
   "dataset_positions": {
     "WebSight": 186,
     "ScienceQA": 164,
@@ -9,10 +9,109 @@
     "Flickr8k": 186,
     "NewYorker": 186,
     "Football": 6,
-    "MagicBrush": 186
   },
   "modality_positions": {
-    "text": {},
     "image": {
       "WebSight": 186,
       "ScienceQA": 164,
@@ -26,8 +125,8 @@
     "audio": {}
   },
   "modality_counts": {
-    "text": 0,
-    "image": 300,
     "video": 0,
     "audio": 0
   },

 {
+  "epoch": 12,
+  "unique_samples": 3029,
+  "total_yields": 6058,
   "dataset_positions": {
     "WebSight": 186,
     "ScienceQA": 164,
     "Flickr8k": 186,
     "NewYorker": 186,
     "Football": 6,
+    "MagicBrush": 186,
+    "WildChat": 200,
+    "Synth-ShellExecution": 200,
+    "Midjourney-Prompts": 200,
+    "Synth-KnowledgeCutoff": 200,
+    "Synth-GroundedResponse": 200,
+    "CodeParrot-Clean": 200,
+    "ShareGPT-Clean": 200,
+    "Synth-Issues": 200,
+    "Dolly-15k": 200,
+    "Conversation-Summarization": 200,
+    "Synth-ShellTimeout": 200,
+    "Synth-Docker": 200,
+    "Synth-Documents": 200,
+    "HumanEval-JavaScript": 164,
+    "OpenOrca": 200,
+    "Synth-MultiStepExecution": 200,
+    "Synth-Citation": 200,
+    "NoRobots": 200,
+    "Synth-LanguageSetup": 200,
+    "Function-Calling-ChatML": 200,
+    "Synth-CoT": 200,
+    "Python-Code-18k": 200,
+    "Code-Feedback": 200,
+    "HumanEval-CPP": 164,
+    "AgentInstruct": 195,
+    "SD-Prompts": 200,
+    "Synth-Diffs": 200,
+    "Golang-Coder": 200,
+    "Synth-ConfidenceLevel": 200,
+    "Synth-RepoContext": 200,
+    "HumanEval-Go": 164,
+    "Synth-SelfCorrection": 200,
+    "Synth-FactCheck": 200,
+    "Synth-Downloads": 200,
+    "Synth-RetrievalGrounded": 200,
+    "Synth-IDK": 200,
+    "Synth-APIGen": 200,
+    "Synth-PythonScripts": 200,
+    "Synth-Uncertainty": 200,
+    "HumanEval-Python": 164,
+    "Golang-QA-2k": 200,
+    "Synth-ShellErrors": 200,
+    "Synth-Jupyter": 200,
+    "Jupyter-Code": 200,
+    "Synth-Execution": 200,
+    "Synth-Monitoring": 200,
+    "Synth-DatabaseSetup": 200,
+    "HumanEval-Java": 164,
+    "Synth-AptInstall": 200,
+    "UltraChat": 200,
+    "Synth-DesktopSetup": 200,
+    "SD-Prompts-2M": 200,
+    "Synth-WebserverSetup": 200,
+    "Pythonic-Function-Calling": 200,
+    "Swift-Code-Edit": 10,
+    "Glaive-Code-Assistant": 200,
+    "File-Operations-Medium": 200,
+    "Swift-Code-RLVR": 200,
+    "Synth-SSHSetup": 200,
+    "HumanEval-Rust": 164,
+    "Synth-Commits": 200,
+    "Synth-FIM": 200,
+    "Synth-Debugging": 200,
+    "Tool-Calls-SingleTurn": 200,
+    "Tool-Calls-Multiturn": 200,
+    "OpenAssistant": 200
   },
   "modality_positions": {
+    "text": {
+      "WildChat": 200,
+      "Midjourney-Prompts": 200,
+      "CodeParrot-Clean": 200,
+      "ShareGPT-Clean": 200,
+      "Dolly-15k": 200,
+      "Conversation-Summarization": 200,
+      "HumanEval-JavaScript": 164,
+      "OpenOrca": 200,
+      "NoRobots": 200,
+      "Function-Calling-ChatML": 200,
+      "Python-Code-18k": 200,
+      "Code-Feedback": 200,
+      "HumanEval-CPP": 164,
+      "AgentInstruct": 195,
+      "SD-Prompts": 200,
+      "Golang-Coder": 200,
+      "HumanEval-Go": 164,
+      "Synth-APIGen": 200,
+      "HumanEval-Python": 164,
+      "Golang-QA-2k": 200,
+      "Jupyter-Code": 200,
+      "HumanEval-Java": 164,
+      "UltraChat": 200,
+      "SD-Prompts-2M": 200,
+      "Pythonic-Function-Calling": 200,
+      "Swift-Code-Edit": 10,
+      "Glaive-Code-Assistant": 200,
+      "Swift-Code-RLVR": 200,
+      "HumanEval-Rust": 164,
+      "Tool-Calls-SingleTurn": 200,
+      "Tool-Calls-Multiturn": 200,
+      "OpenAssistant": 200
+    },
     "image": {
       "WebSight": 186,
       "ScienceQA": 164,
     "audio": {}
   },
   "modality_counts": {
+    "text": 3029,
+    "image": 0,
     "video": 0,
     "audio": 0
   },

trainer_state.json CHANGED Viewed

@@ -1,31 +1,31 @@
 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
-  "best_metric": 4.019162586334472,
-  "epoch": 1,
-  "epochs_completed": 1,
-  "global_step": 37,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
-  "max_steps": 37,
-  "num_train_epochs": 1,
   "total_flos": 0,
   "train_batch_size": 1,
   "effective_batch_size": 16,
   "learning_rate": 0.0001,
   "max_grad_norm": 1.0,
   "trainable_components": [
-    "vision",
     "llm",
     "cross_attention",
-    "image_generation",
     "modality_markers"
   ],
   "frozen_components": [
     "video",
     "audio",
     "speech",
     "video_generation"
   ],
   "trial_name": null,

 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
+  "best_metric": 3.646694382440487,
+  "epoch": 4,
+  "epochs_completed": 4,
+  "global_step": 1597,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
+  "max_steps": 1597,
+  "num_train_epochs": 4,
   "total_flos": 0,
   "train_batch_size": 1,
   "effective_batch_size": 16,
   "learning_rate": 0.0001,
   "max_grad_norm": 1.0,
   "trainable_components": [
     "llm",
     "cross_attention",
     "modality_markers"
   ],
   "frozen_components": [
+    "vision",
     "video",
     "audio",
     "speech",
+    "image_generation",
     "video_generation"
   ],
   "trial_name": null,

training_state.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cff88f1a8ee14094dfffadc0ac06d52480a2d90bd740252423ecf77cdef8f6cc
-size 1419723549

 version https://git-lfs.github.com/spec/v1
+oid sha256:9be0dff94c6d235091cae224c3e034a33fa84932af351d74ac37a512956c5486
+size 781495681