Update model weights after training (epoch 1, loss 6.6292)

Browse files

Files changed (7) hide show

audio_decoder.safetensors +1 -1
config.json +1 -1
cross_attention.safetensors +1 -1
llm.safetensors +1 -1
streaming_state.json +104 -8
trainer_state.json +5 -5
training_state.pt +2 -2

audio_decoder.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:261d0afbb5e08b6b1900e3dea25eb42c412e5542bee5a4f0681a898ae9c8bcd8
 size 1458415836

 version https://git-lfs.github.com/spec/v1
+oid sha256:3226fee536a749a40aab83f5afa949808d778485026f933161c2d0a6b66c03f9
 size 1458415836

config.json CHANGED Viewed

@@ -5,7 +5,7 @@
   "num_layers": 12,
   "num_heads": 16,
   "intermediate_size": 2048,
-  "vocab_size": 151643,
   "max_position_embeddings": 131072,
   "rms_norm_eps": 1e-06,
   "use_ring_attention": true,

   "num_layers": 12,
   "num_heads": 16,
   "intermediate_size": 2048,
+  "vocab_size": 152200,
   "max_position_embeddings": 131072,
   "rms_norm_eps": 1e-06,
   "use_ring_attention": true,

cross_attention.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27a688ad60e1a8efc783cfa66ad0ed5e9c4e74a0f5437e134ea717bdbb761eb3
 size 174191400

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1dd70b1b4136042c3241058967ff7fb8423547263fe302498c3cc9f2ab00703
 size 174191400

llm.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f05d7774509a9338dd769956837f7b62c63ad4ab45a56fbb919230f51c876c6
 size 1506831304

 version https://git-lfs.github.com/spec/v1
+oid sha256:c84ad6f98c7c9d20394a4a356dd6f56d27ee8ada70d3a891c1e8e557df3280dd
 size 1506831304

streaming_state.json CHANGED Viewed

@@ -1,21 +1,117 @@
 {
   "epoch": 1,
-  "unique_samples": 1,
-  "total_yields": 2,
   "dataset_positions": {
-    "InstructPix2Pix": 1
   },
   "modality_positions": {
-    "text": {},
-    "image": {
-      "InstructPix2Pix": 1
     },
     "video": {},
     "audio": {}
   },
   "modality_counts": {
-    "text": 0,
-    "image": 1,
     "video": 0,
     "audio": 0
   },

 {
   "epoch": 1,
+  "unique_samples": 3260,
+  "total_yields": 6520,
   "dataset_positions": {
+    "Synth-SelfCorrection": 50,
+    "Synth-Documents": 50,
+    "Synth-ShellTimeout": 50,
+    "Jupyter-Code": 50,
+    "HumanEval-JavaScript": 50,
+    "Synth-DesktopSetup": 50,
+    "UltraChat": 50,
+    "HumanEval-Python": 50,
+    "Dolly-15k": 50,
+    "Synth-ShellExecution": 50,
+    "Midjourney-Prompts": 50,
+    "Synth-PythonScripts": 50,
+    "Synth-Issues": 50,
+    "Synth-Monitoring": 50,
+    "Synth-KnowledgeCutoff": 50,
+    "Synth-Uncertainty": 50,
+    "Swift-Code-RLVR": 50,
+    "HumanEval-CPP": 50,
+    "Synth-CoT": 50,
+    "Synth-Debugging": 50,
+    "Swift-Code-Edit": 10,
+    "SD-Prompts-2M": 50,
+    "Synth-WebserverSetup": 50,
+    "Synth-SSHSetup": 50,
+    "File-Operations-Medium": 50,
+    "Python-Code-18k": 50,
+    "Synth-RepoContext": 50,
+    "Synth-IDK": 50,
+    "WildChat": 50,
+    "Synth-FIM": 50,
+    "Synth-GroundedResponse": 50,
+    "Synth-AptInstall": 50,
+    "Golang-Coder": 50,
+    "HumanEval-Java": 50,
+    "AgentInstruct": 50,
+    "Function-Calling-ChatML": 50,
+    "Synth-Downloads": 50,
+    "Synth-MultiStepExecution": 50,
+    "Synth-RetrievalGrounded": 50,
+    "Pythonic-Function-Calling": 50,
+    "OpenOrca": 50,
+    "Synth-Citation": 50,
+    "Golang-QA-2k": 50,
+    "Synth-APIGen": 50,
+    "CodeParrot-Clean": 50,
+    "Synth-Jupyter": 50,
+    "Synth-ShellErrors": 50,
+    "NoRobots": 50,
+    "Synth-Docker": 50,
+    "Glaive-Code-Assistant": 50,
+    "Synth-Diffs": 50,
+    "ShareGPT-Clean": 50,
+    "Code-Feedback": 50,
+    "Conversation-Summarization": 50,
+    "SD-Prompts": 50,
+    "Synth-LanguageSetup": 50,
+    "Synth-FactCheck": 50,
+    "Synth-Execution": 50,
+    "HumanEval-Rust": 50,
+    "Synth-DatabaseSetup": 50,
+    "Synth-ConfidenceLevel": 50,
+    "Synth-Commits": 50,
+    "HumanEval-Go": 50,
+    "Tool-Calls-Multiturn": 50,
+    "OpenAssistant": 50,
+    "Tool-Calls-SingleTurn": 50
   },
   "modality_positions": {
+    "text": {
+      "Jupyter-Code": 50,
+      "HumanEval-JavaScript": 50,
+      "UltraChat": 50,
+      "HumanEval-Python": 50,
+      "Dolly-15k": 50,
+      "Midjourney-Prompts": 50,
+      "Swift-Code-RLVR": 50,
+      "HumanEval-CPP": 50,
+      "Swift-Code-Edit": 10,
+      "SD-Prompts-2M": 50,
+      "Python-Code-18k": 50,
+      "WildChat": 50,
+      "Golang-Coder": 50,
+      "HumanEval-Java": 50,
+      "AgentInstruct": 50,
+      "Function-Calling-ChatML": 50,
+      "Pythonic-Function-Calling": 50,
+      "OpenOrca": 50,
+      "Golang-QA-2k": 50,
+      "Synth-APIGen": 50,
+      "CodeParrot-Clean": 50,
+      "NoRobots": 50,
+      "Glaive-Code-Assistant": 50,
+      "ShareGPT-Clean": 50,
+      "Code-Feedback": 50,
+      "Conversation-Summarization": 50,
+      "SD-Prompts": 50,
+      "HumanEval-Rust": 50,
+      "HumanEval-Go": 50,
+      "Tool-Calls-Multiturn": 50,
+      "OpenAssistant": 50,
+      "Tool-Calls-SingleTurn": 50
     },
+    "image": {},
     "video": {},
     "audio": {}
   },
   "modality_counts": {
+    "text": 3260,
+    "image": 0,
     "video": 0,
     "audio": 0
   },

trainer_state.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
-  "best_metric": 12.373827934265137,
   "epoch": 1,
   "epochs_completed": 1,
-  "global_step": 0,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
-  "max_steps": 0,
   "num_train_epochs": 1,
   "total_flos": 0,
   "train_batch_size": 1,
@@ -16,16 +16,16 @@
   "learning_rate": 0.0001,
   "max_grad_norm": 1.0,
   "trainable_components": [
-    "vision",
     "llm",
     "cross_attention",
-    "image_generation",
     "modality_markers"
   ],
   "frozen_components": [
     "video",
     "audio",
     "speech",
     "video_generation"
   ],
   "trial_name": null,

 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
+  "best_metric": 6.629150597175206,
   "epoch": 1,
   "epochs_completed": 1,
+  "global_step": 407,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
+  "max_steps": 407,
   "num_train_epochs": 1,
   "total_flos": 0,
   "train_batch_size": 1,
   "learning_rate": 0.0001,
   "max_grad_norm": 1.0,
   "trainable_components": [
     "llm",
     "cross_attention",
     "modality_markers"
   ],
   "frozen_components": [
+    "vision",
     "video",
     "audio",
     "speech",
+    "image_generation",
     "video_generation"
   ],
   "trial_name": null,

training_state.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09c47de74291bae883f60d4986fe8dbc38a2c68de7574fdabc66ce46222ed711
-size 5143

 version https://git-lfs.github.com/spec/v1
+oid sha256:d32523d7fc44d7f0f1c884a1463149d48212161a3bfbd0f82b045cf4a1d583a2
+size 781490561