Update model weights after training (epoch 3, loss 3.3970)

Browse files

Files changed (5) hide show

cross_attention.safetensors +1 -1
llm.safetensors +1 -1
streaming_state.json +67 -43
trainer_state.json +3 -3
training_state.pt +2 -2

cross_attention.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a410f48cda8debf8f2d2be1fb09c1d17576889da07735b3bb35816da1f73c91a
 size 174191400

 version https://git-lfs.github.com/spec/v1
+oid sha256:deec8e7a08caa092ae8f2831f90c3a76bd49297d6cc2c0fd8daf80bf163b2128
 size 174191400

llm.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:764e3c0c4f9f4848eaf07fd785e8be9f6fb0e486e607c14ff00311632e59694d
 size 1506836434

 version https://git-lfs.github.com/spec/v1
+oid sha256:22e14456b6c0badb864ac7491d545f18369dd99ad87a247bb0ad716911b28fea
 size 1506836434

streaming_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "epoch": 153,
-  "unique_samples": 600,
-  "total_yields": 1200,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
@@ -10,26 +10,26 @@
     "NewYorker": 386,
     "Football": 6,
     "MagicBrush": 386,
-    "WildChat": 200,
-    "Synth-ShellExecution": 200,
     "Midjourney-Prompts": 200,
     "Synth-KnowledgeCutoff": 550,
     "Synth-GroundedResponse": 550,
     "CodeParrot-Clean": 350,
-    "ShareGPT-Clean": 200,
     "Synth-Issues": 350,
     "Dolly-15k": 800,
     "Conversation-Summarization": 800,
-    "Synth-ShellTimeout": 200,
-    "Synth-Docker": 200,
     "Synth-Documents": 450,
     "HumanEval-JavaScript": 164,
     "OpenOrca": 800,
-    "Synth-MultiStepExecution": 200,
     "Synth-Citation": 550,
     "NoRobots": 800,
-    "Synth-LanguageSetup": 200,
-    "Function-Calling-ChatML": 200,
     "Synth-CoT": 900,
     "Python-Code-18k": 350,
     "Code-Feedback": 350,
@@ -43,38 +43,38 @@
     "HumanEval-Go": 164,
     "Synth-SelfCorrection": 550,
     "Synth-FactCheck": 550,
-    "Synth-Downloads": 200,
     "Synth-RetrievalGrounded": 550,
     "Synth-IDK": 550,
-    "Synth-APIGen": 200,
-    "Synth-PythonScripts": 200,
     "Synth-Uncertainty": 550,
     "HumanEval-Python": 164,
     "Golang-QA-2k": 350,
-    "Synth-ShellErrors": 200,
-    "Synth-Jupyter": 200,
     "Jupyter-Code": 350,
-    "Synth-Execution": 200,
-    "Synth-Monitoring": 200,
-    "Synth-DatabaseSetup": 200,
     "HumanEval-Java": 164,
-    "Synth-AptInstall": 200,
-    "UltraChat": 200,
-    "Synth-DesktopSetup": 200,
     "SD-Prompts-2M": 200,
-    "Synth-WebserverSetup": 200,
-    "Pythonic-Function-Calling": 200,
     "Swift-Code-Edit": 10,
-    "Glaive-Code-Assistant": 200,
-    "File-Operations-Medium": 200,
     "Swift-Code-RLVR": 350,
-    "Synth-SSHSetup": 200,
     "HumanEval-Rust": 164,
     "Synth-Commits": 350,
     "Synth-FIM": 350,
-    "Synth-Debugging": 200,
-    "Tool-Calls-SingleTurn": 200,
-    "Tool-Calls-Multiturn": 200,
     "OpenAssistant": 800,
     "T2V-Sora-Preferences-2": 650,
     "T2V-Human-Preferences": 650,
@@ -99,20 +99,23 @@
     "Medical-Reasoning-SFT-Mega": 650,
     "Medical-O1-Reasoning-EN": 650,
     "OpenThoughts-114k": 350,
-    "Bespoke-Stratos-17k": 350
   },
   "modality_positions": {
     "text": {
-      "WildChat": 200,
       "Midjourney-Prompts": 200,
       "CodeParrot-Clean": 350,
-      "ShareGPT-Clean": 200,
       "Dolly-15k": 800,
       "Conversation-Summarization": 800,
       "HumanEval-JavaScript": 164,
       "OpenOrca": 800,
       "NoRobots": 800,
-      "Function-Calling-ChatML": 200,
       "Python-Code-18k": 350,
       "Code-Feedback": 350,
       "HumanEval-CPP": 164,
@@ -120,20 +123,20 @@
       "SD-Prompts": 200,
       "Golang-Coder": 350,
       "HumanEval-Go": 164,
-      "Synth-APIGen": 200,
       "HumanEval-Python": 164,
       "Golang-QA-2k": 350,
       "Jupyter-Code": 350,
       "HumanEval-Java": 164,
-      "UltraChat": 200,
       "SD-Prompts-2M": 200,
-      "Pythonic-Function-Calling": 200,
       "Swift-Code-Edit": 10,
-      "Glaive-Code-Assistant": 200,
       "Swift-Code-RLVR": 350,
       "HumanEval-Rust": 164,
-      "Tool-Calls-SingleTurn": 200,
-      "Tool-Calls-Multiturn": 200,
       "OpenAssistant": 800,
       "SmolTalk-OpenHermes": 600,
       "SmolTalk-All": 600,
@@ -164,7 +167,28 @@
       "Synth-Issues": 350,
       "Synth-Commits": 350,
       "Synth-FIM": 350,
-      "Synth-Diffs": 350
     },
     "image": {
       "WebSight": 386,
@@ -189,11 +213,11 @@
     "audio": {}
   },
   "modality_counts": {
-    "text": 350,
     "image": 0,
     "video": 0,
     "audio": 0,
-    "agentic_coding": 250
   },
   "last_modality": null
 }

 {
+  "epoch": 158,
+  "unique_samples": 1500,
+  "total_yields": 3000,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
     "NewYorker": 386,
     "Football": 6,
     "MagicBrush": 386,
+    "WildChat": 350,
+    "Synth-ShellExecution": 350,
     "Midjourney-Prompts": 200,
     "Synth-KnowledgeCutoff": 550,
     "Synth-GroundedResponse": 550,
     "CodeParrot-Clean": 350,
+    "ShareGPT-Clean": 350,
     "Synth-Issues": 350,
     "Dolly-15k": 800,
     "Conversation-Summarization": 800,
+    "Synth-ShellTimeout": 350,
+    "Synth-Docker": 350,
     "Synth-Documents": 450,
     "HumanEval-JavaScript": 164,
     "OpenOrca": 800,
+    "Synth-MultiStepExecution": 350,
     "Synth-Citation": 550,
     "NoRobots": 800,
+    "Synth-LanguageSetup": 350,
+    "Function-Calling-ChatML": 350,
     "Synth-CoT": 900,
     "Python-Code-18k": 350,
     "Code-Feedback": 350,
     "HumanEval-Go": 164,
     "Synth-SelfCorrection": 550,
     "Synth-FactCheck": 550,
+    "Synth-Downloads": 350,
     "Synth-RetrievalGrounded": 550,
     "Synth-IDK": 550,
+    "Synth-APIGen": 350,
+    "Synth-PythonScripts": 350,
     "Synth-Uncertainty": 550,
     "HumanEval-Python": 164,
     "Golang-QA-2k": 350,
+    "Synth-ShellErrors": 350,
+    "Synth-Jupyter": 350,
     "Jupyter-Code": 350,
+    "Synth-Execution": 350,
+    "Synth-Monitoring": 350,
+    "Synth-DatabaseSetup": 350,
     "HumanEval-Java": 164,
+    "Synth-AptInstall": 350,
+    "UltraChat": 350,
+    "Synth-DesktopSetup": 350,
     "SD-Prompts-2M": 200,
+    "Synth-WebserverSetup": 350,
+    "Pythonic-Function-Calling": 350,
     "Swift-Code-Edit": 10,
+    "Glaive-Code-Assistant": 350,
+    "File-Operations-Medium": 350,
     "Swift-Code-RLVR": 350,
+    "Synth-SSHSetup": 350,
     "HumanEval-Rust": 164,
     "Synth-Commits": 350,
     "Synth-FIM": 350,
+    "Synth-Debugging": 350,
+    "Tool-Calls-SingleTurn": 350,
+    "Tool-Calls-Multiturn": 350,
     "OpenAssistant": 800,
     "T2V-Sora-Preferences-2": 650,
     "T2V-Human-Preferences": 650,
     "Medical-Reasoning-SFT-Mega": 650,
     "Medical-O1-Reasoning-EN": 650,
     "OpenThoughts-114k": 350,
+    "Bespoke-Stratos-17k": 350,
+    "Synth-FileOps": 150,
+    "Synth-EditLines": 150,
+    "Agentic-CoT-Coding": 150
   },
   "modality_positions": {
     "text": {
+      "WildChat": 350,
       "Midjourney-Prompts": 200,
       "CodeParrot-Clean": 350,
+      "ShareGPT-Clean": 350,
       "Dolly-15k": 800,
       "Conversation-Summarization": 800,
       "HumanEval-JavaScript": 164,
       "OpenOrca": 800,
       "NoRobots": 800,
+      "Function-Calling-ChatML": 350,
       "Python-Code-18k": 350,
       "Code-Feedback": 350,
       "HumanEval-CPP": 164,
       "SD-Prompts": 200,
       "Golang-Coder": 350,
       "HumanEval-Go": 164,
+      "Synth-APIGen": 350,
       "HumanEval-Python": 164,
       "Golang-QA-2k": 350,
       "Jupyter-Code": 350,
       "HumanEval-Java": 164,
+      "UltraChat": 350,
       "SD-Prompts-2M": 200,
+      "Pythonic-Function-Calling": 350,
       "Swift-Code-Edit": 10,
+      "Glaive-Code-Assistant": 350,
       "Swift-Code-RLVR": 350,
       "HumanEval-Rust": 164,
+      "Tool-Calls-SingleTurn": 350,
+      "Tool-Calls-Multiturn": 350,
       "OpenAssistant": 800,
       "SmolTalk-OpenHermes": 600,
       "SmolTalk-All": 600,
       "Synth-Issues": 350,
       "Synth-Commits": 350,
       "Synth-FIM": 350,
+      "Synth-Diffs": 350,
+      "Synth-Monitoring": 350,
+      "Synth-FileOps": 150,
+      "Synth-Debugging": 350,
+      "Synth-Downloads": 350,
+      "Synth-ShellErrors": 350,
+      "Synth-DesktopSetup": 350,
+      "Synth-ShellExecution": 350,
+      "Synth-LanguageSetup": 350,
+      "Synth-DatabaseSetup": 350,
+      "Synth-MultiStepExecution": 350,
+      "Synth-Jupyter": 350,
+      "File-Operations-Medium": 350,
+      "Synth-ShellTimeout": 350,
+      "Synth-Docker": 350,
+      "Synth-SSHSetup": 350,
+      "Synth-EditLines": 150,
+      "Synth-AptInstall": 350,
+      "Synth-Execution": 350,
+      "Synth-PythonScripts": 350,
+      "Synth-WebserverSetup": 350,
+      "Agentic-CoT-Coding": 150
     },
     "image": {
       "WebSight": 386,
     "audio": {}
   },
   "modality_counts": {
+    "text": 500,
     "image": 0,
     "video": 0,
     "audio": 0,
+    "agentic_coding": 1000
   },
   "last_modality": null
 }

trainer_state.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
-  "best_metric": 4.816719281872113,
   "epoch": 3,
   "epochs_completed": 3,
-  "global_step": 225,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
-  "max_steps": 225,
   "num_train_epochs": 3,
   "total_flos": 0,
   "train_batch_size": 1,

 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
+  "best_metric": 3.3970277398874362,
   "epoch": 3,
   "epochs_completed": 3,
+  "global_step": 561,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
+  "max_steps": 561,
   "num_train_epochs": 3,
   "total_flos": 0,
   "train_batch_size": 1,

training_state.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:293fb75013be44e5d744ce753ca2e022778ec21d4c5bdb1776a5416d3f95f90d
-size 1514916733

 version https://git-lfs.github.com/spec/v1
+oid sha256:c68c3e0a999f4613a219fa4812a9e409690b7cabef93d848a043cf5c66e2b3b9
+size 1514917181