Update model weights after training (epoch 7, loss 4.0529)

Browse files

Files changed (6) hide show

audio_decoder.safetensors +1 -1
cross_attention.safetensors +1 -1
llm.safetensors +1 -1
streaming_state.json +25 -15
trainer_state.json +3 -3
training_state.pt +1 -1

audio_decoder.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1628ac8faba5b54287f88aae6cd7885ccfa61f06ea0a09b08f01e91463b96df6
 size 1458410612

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0d441ee5242446730041340f4527da087aacd6ed532fe2360272445c4a95f61
 size 1458410612

cross_attention.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01f2c0d12d9a882e71b37a268aa426e99b03cbc1372629eb283a28a10d05d5c6
 size 174191400

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b80e7ffc0041a1d36ca67d251b7699f834fd719ebda58a39a2fb0b50e44db53
 size 174191400

llm.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcb8a06e9fc3b7ab14df9d1e54eea7fe4732a0ece031f4af02f4bed76416c620
 size 1506832040

 version https://git-lfs.github.com/spec/v1
+oid sha256:f634772387625e35cd41da1b9426234e6eab6e1df5648597b153a1e9095fe226
 size 1506832040

streaming_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "epoch": 98,
-  "unique_samples": 400,
-  "total_yields": 800,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
@@ -13,8 +13,8 @@
     "WildChat": 200,
     "Synth-ShellExecution": 200,
     "Midjourney-Prompts": 200,
-    "Synth-KnowledgeCutoff": 200,
-    "Synth-GroundedResponse": 200,
     "CodeParrot-Clean": 200,
     "ShareGPT-Clean": 200,
     "Synth-Issues": 200,
@@ -26,7 +26,7 @@
     "HumanEval-JavaScript": 164,
     "OpenOrca": 450,
     "Synth-MultiStepExecution": 200,
-    "Synth-Citation": 200,
     "NoRobots": 450,
     "Synth-LanguageSetup": 200,
     "Function-Calling-ChatML": 200,
@@ -38,17 +38,17 @@
     "SD-Prompts": 200,
     "Synth-Diffs": 200,
     "Golang-Coder": 200,
-    "Synth-ConfidenceLevel": 200,
     "Synth-RepoContext": 200,
     "HumanEval-Go": 164,
-    "Synth-SelfCorrection": 200,
-    "Synth-FactCheck": 200,
     "Synth-Downloads": 200,
-    "Synth-RetrievalGrounded": 200,
-    "Synth-IDK": 200,
     "Synth-APIGen": 200,
     "Synth-PythonScripts": 200,
-    "Synth-Uncertainty": 200,
     "HumanEval-Python": 164,
     "Golang-QA-2k": 200,
     "Synth-ShellErrors": 200,
@@ -145,7 +145,16 @@
       "Cosmopedia-OpenStax": 600,
       "MedMCQA": 350,
       "Medical-Reasoning-SFT-Mega": 350,
-      "Medical-O1-Reasoning-EN": 350
     },
     "image": {
       "WebSight": 386,
@@ -170,10 +179,11 @@
     "audio": {}
   },
   "modality_counts": {
-    "text": 400,
     "image": 0,
     "video": 0,
-    "audio": 0
   },
   "last_modality": null
 }

 {
+  "epoch": 111,
+  "unique_samples": 450,
+  "total_yields": 900,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
     "WildChat": 200,
     "Synth-ShellExecution": 200,
     "Midjourney-Prompts": 200,
+    "Synth-KnowledgeCutoff": 550,
+    "Synth-GroundedResponse": 550,
     "CodeParrot-Clean": 200,
     "ShareGPT-Clean": 200,
     "Synth-Issues": 200,
     "HumanEval-JavaScript": 164,
     "OpenOrca": 450,
     "Synth-MultiStepExecution": 200,
+    "Synth-Citation": 550,
     "NoRobots": 450,
     "Synth-LanguageSetup": 200,
     "Function-Calling-ChatML": 200,
     "SD-Prompts": 200,
     "Synth-Diffs": 200,
     "Golang-Coder": 200,
+    "Synth-ConfidenceLevel": 550,
     "Synth-RepoContext": 200,
     "HumanEval-Go": 164,
+    "Synth-SelfCorrection": 550,
+    "Synth-FactCheck": 550,
     "Synth-Downloads": 200,
+    "Synth-RetrievalGrounded": 550,
+    "Synth-IDK": 550,
     "Synth-APIGen": 200,
     "Synth-PythonScripts": 200,
+    "Synth-Uncertainty": 550,
     "HumanEval-Python": 164,
     "Golang-QA-2k": 200,
     "Synth-ShellErrors": 200,
       "Cosmopedia-OpenStax": 600,
       "MedMCQA": 350,
       "Medical-Reasoning-SFT-Mega": 350,
+      "Medical-O1-Reasoning-EN": 350,
+      "Synth-SelfCorrection": 550,
+      "Synth-GroundedResponse": 550,
+      "Synth-IDK": 550,
+      "Synth-KnowledgeCutoff": 550,
+      "Synth-RetrievalGrounded": 550,
+      "Synth-FactCheck": 550,
+      "Synth-ConfidenceLevel": 550,
+      "Synth-Citation": 550,
+      "Synth-Uncertainty": 550
     },
     "image": {
       "WebSight": 386,
     "audio": {}
   },
   "modality_counts": {
+    "text": 0,
     "image": 0,
     "video": 0,
+    "audio": 0,
+    "anti_hallucination": 450
   },
   "last_modality": null
 }

trainer_state.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
-  "best_metric": 5.354270628392697,
   "epoch": 7,
   "epochs_completed": 7,
-  "global_step": 350,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
-  "max_steps": 350,
   "num_train_epochs": 7,
   "total_flos": 0,
   "train_batch_size": 1,

 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
+  "best_metric": 4.052912997868326,
   "epoch": 7,
   "epochs_completed": 7,
+  "global_step": 392,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
+  "max_steps": 392,
   "num_train_epochs": 7,
   "total_flos": 0,
   "train_batch_size": 1,

training_state.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:148961e9ff451a61d54b8edf577bcc3dd080efec47d5d54d58a5870199de86a1
 size 1514912171

 version https://git-lfs.github.com/spec/v1
+oid sha256:6774f59a3551cdf2b9af488e15bdec2bf75577937a4376aa12ec72a1c950e780
 size 1514912171