Update model weights after training (epoch 7, loss 6.6223)

Browse files

Files changed (6) hide show

audio_decoder.safetensors +1 -1
cross_attention.safetensors +1 -1
llm.safetensors +1 -1
streaming_state.json +18 -18
trainer_state.json +6 -6
training_state.pt +2 -2

audio_decoder.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3d8275a11c6158d10ceebea39e190afac85a98214c0971b53d14fc16485311d
 size 1458410612

 version https://git-lfs.github.com/spec/v1
+oid sha256:b122e6fd48e418062afa9a2835c289fd000c1b39c3339a4a203be9976b7ac486
 size 1458410612

cross_attention.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b33d56ad92215c91c9abeb210d299a9a5b5236a6bd185ccd17274fddce82686f
 size 174191400

 version https://git-lfs.github.com/spec/v1
+oid sha256:15e348f1b98e8cc48f633f80a818a98a727f8a95e3794d3d7496c7c67d319c21
 size 174191400

llm.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9e29bdb5011f77a07216e19cd7160ef7d5bf95fcad4eb3458ee32beea27a98c
 size 1506832040

 version https://git-lfs.github.com/spec/v1
+oid sha256:3888d6f2029add98a6540daf90a2fffaf8b2c0420fca1b401042a37ae56f957f
 size 1506832040

streaming_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "epoch": 122,
-  "unique_samples": 150,
-  "total_yields": 300,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
@@ -18,16 +18,16 @@
     "CodeParrot-Clean": 200,
     "ShareGPT-Clean": 200,
     "Synth-Issues": 200,
-    "Dolly-15k": 450,
-    "Conversation-Summarization": 450,
     "Synth-ShellTimeout": 200,
     "Synth-Docker": 200,
     "Synth-Documents": 450,
     "HumanEval-JavaScript": 164,
-    "OpenOrca": 450,
     "Synth-MultiStepExecution": 200,
     "Synth-Citation": 550,
-    "NoRobots": 450,
     "Synth-LanguageSetup": 200,
     "Function-Calling-ChatML": 200,
     "Synth-CoT": 550,
@@ -75,7 +75,7 @@
     "Synth-Debugging": 200,
     "Tool-Calls-SingleTurn": 200,
     "Tool-Calls-Multiturn": 200,
-    "OpenAssistant": 450,
     "T2V-Sora-Preferences-2": 650,
     "T2V-Human-Preferences": 650,
     "Sora-Alignment-Likert": 198,
@@ -85,8 +85,8 @@
     "Sora-Physics-Likert": 198,
     "TIP-I2V": 650,
     "Pexels-I2V-350k": 650,
-    "SmolTalk-OpenHermes": 250,
-    "SmolTalk-All": 250,
     "Cosmopedia-AutoMath": 600,
     "OpenMathInstruct-1": 600,
     "NuminaMath-CoT": 600,
@@ -105,11 +105,11 @@
       "Midjourney-Prompts": 200,
       "CodeParrot-Clean": 200,
       "ShareGPT-Clean": 200,
-      "Dolly-15k": 450,
-      "Conversation-Summarization": 450,
       "HumanEval-JavaScript": 164,
-      "OpenOrca": 450,
-      "NoRobots": 450,
       "Function-Calling-ChatML": 200,
       "Python-Code-18k": 200,
       "Code-Feedback": 200,
@@ -132,9 +132,9 @@
       "HumanEval-Rust": 164,
       "Tool-Calls-SingleTurn": 200,
       "Tool-Calls-Multiturn": 200,
-      "OpenAssistant": 450,
-      "SmolTalk-OpenHermes": 250,
-      "SmolTalk-All": 250,
       "Cosmopedia-AutoMath": 600,
       "OpenMathInstruct-1": 600,
       "NuminaMath-CoT": 600,
@@ -179,7 +179,7 @@
     "audio": {}
   },
   "modality_counts": {
-    "text": 150,
     "image": 0,
     "video": 0,
     "audio": 0

 {
+  "epoch": 135,
+  "unique_samples": 350,
+  "total_yields": 700,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
     "CodeParrot-Clean": 200,
     "ShareGPT-Clean": 200,
     "Synth-Issues": 200,
+    "Dolly-15k": 800,
+    "Conversation-Summarization": 800,
     "Synth-ShellTimeout": 200,
     "Synth-Docker": 200,
     "Synth-Documents": 450,
     "HumanEval-JavaScript": 164,
+    "OpenOrca": 800,
     "Synth-MultiStepExecution": 200,
     "Synth-Citation": 550,
+    "NoRobots": 800,
     "Synth-LanguageSetup": 200,
     "Function-Calling-ChatML": 200,
     "Synth-CoT": 550,
     "Synth-Debugging": 200,
     "Tool-Calls-SingleTurn": 200,
     "Tool-Calls-Multiturn": 200,
+    "OpenAssistant": 800,
     "T2V-Sora-Preferences-2": 650,
     "T2V-Human-Preferences": 650,
     "Sora-Alignment-Likert": 198,
     "Sora-Physics-Likert": 198,
     "TIP-I2V": 650,
     "Pexels-I2V-350k": 650,
+    "SmolTalk-OpenHermes": 600,
+    "SmolTalk-All": 600,
     "Cosmopedia-AutoMath": 600,
     "OpenMathInstruct-1": 600,
     "NuminaMath-CoT": 600,
       "Midjourney-Prompts": 200,
       "CodeParrot-Clean": 200,
       "ShareGPT-Clean": 200,
+      "Dolly-15k": 800,
+      "Conversation-Summarization": 800,
       "HumanEval-JavaScript": 164,
+      "OpenOrca": 800,
+      "NoRobots": 800,
       "Function-Calling-ChatML": 200,
       "Python-Code-18k": 200,
       "Code-Feedback": 200,
       "HumanEval-Rust": 164,
       "Tool-Calls-SingleTurn": 200,
       "Tool-Calls-Multiturn": 200,
+      "OpenAssistant": 800,
+      "SmolTalk-OpenHermes": 600,
+      "SmolTalk-All": 600,
       "Cosmopedia-AutoMath": 600,
       "OpenMathInstruct-1": 600,
       "NuminaMath-CoT": 600,
     "audio": {}
   },
   "modality_counts": {
+    "text": 350,
     "image": 0,
     "video": 0,
     "audio": 0

trainer_state.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
-  "best_metric": 6.006418684224288,
-  "epoch": 6,
-  "epochs_completed": 6,
-  "global_step": 108,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
-  "max_steps": 108,
-  "num_train_epochs": 6,
   "total_flos": 0,
   "train_batch_size": 1,
   "effective_batch_size": 16,

 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
+  "best_metric": 6.622317645549774,
+  "epoch": 7,
+  "epochs_completed": 7,
+  "global_step": 301,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
+  "max_steps": 301,
+  "num_train_epochs": 7,
   "total_flos": 0,
   "train_batch_size": 1,
   "effective_batch_size": 16,

training_state.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31042e6c4b799d02f267c328d1af6d4c2361df10199ab9df2300d09974e87bc4
-size 1514911723

 version https://git-lfs.github.com/spec/v1
+oid sha256:a89e9a0652c7c060ae5d2f1211f9a8ce9e301009c1282faa827cfb44a01e4db3
+size 1514912171