Update model weights after training (epoch 7, loss 7.1865)

Files changed (6) hide show

audio_decoder.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e161c50803ebcf38666c109dde1baef4dc92fa5db9967fbd8e72f2b5af392b76
 size 1458410612

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a240617216d0644ac615eed664398b69c732420dba3c0121a5c1344652fb7fa
 size 1458410612

cross_attention.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ba284496e9f5135658519fcb742fe092211dfd5df2aa73ac4e3effa31fb5319
 size 174191400

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d5b0a5a4040b026f16479ff817aadab4e42a2281750c7728f70aba9fd988a1f
 size 174191400

llm.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:050b46cae242ff76f36def2a463e491fd494e2a8f5ba239229e8cd25851300e5
 size 1506832040

 version https://git-lfs.github.com/spec/v1
+oid sha256:eeebe91f597a9aba32227820d24ec97ec34c5680090cd85ad3006eeefb812081
 size 1506832040

streaming_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "epoch": 71,
-  "unique_samples": 400,
-  "total_yields": 800,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
@@ -94,7 +94,10 @@
     "Cosmopedia-KhanAcademy": 250,
     "NuminaMath-TIR": 250,
     "UltraData-Math-QA": 250,
-    "Cosmopedia-OpenStax": 250
   },
   "modality_positions": {
     "text": {
@@ -139,7 +142,10 @@
       "Cosmopedia-KhanAcademy": 250,
       "NuminaMath-TIR": 250,
       "UltraData-Math-QA": 250,
-      "Cosmopedia-OpenStax": 250
     },
     "image": {
       "WebSight": 386,
@@ -164,7 +170,7 @@
     "audio": {}
   },
   "modality_counts": {
-    "text": 400,
     "image": 0,
     "video": 0,
     "audio": 0

 {
+  "epoch": 84,
+  "unique_samples": 150,
+  "total_yields": 300,
   "dataset_positions": {
     "WebSight": 386,
     "ScienceQA": 364,
     "Cosmopedia-KhanAcademy": 250,
     "NuminaMath-TIR": 250,
     "UltraData-Math-QA": 250,
+    "Cosmopedia-OpenStax": 250,
+    "MedMCQA": 350,
+    "Medical-Reasoning-SFT-Mega": 350,
+    "Medical-O1-Reasoning-EN": 350
   },
   "modality_positions": {
     "text": {
       "Cosmopedia-KhanAcademy": 250,
       "NuminaMath-TIR": 250,
       "UltraData-Math-QA": 250,
+      "Cosmopedia-OpenStax": 250,
+      "MedMCQA": 350,
+      "Medical-Reasoning-SFT-Mega": 350,
+      "Medical-O1-Reasoning-EN": 350
     },
     "image": {
       "WebSight": 386,
     "audio": {}
   },
   "modality_counts": {
+    "text": 150,
     "image": 0,
     "video": 0,
     "audio": 0

trainer_state.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
-  "best_metric": 6.564389287829399,
-  "epoch": 5,
-  "epochs_completed": 5,
-  "global_step": 250,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
-  "max_steps": 250,
-  "num_train_epochs": 5,
   "total_flos": 0,
   "train_batch_size": 1,
   "effective_batch_size": 16,

 {
   "best_model_checkpoint": "/kaggle/working/xoron-final",
+  "best_metric": 7.186485166748365,
+  "epoch": 7,
+  "epochs_completed": 7,
+  "global_step": 126,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [],
   "logging_steps": 50,
+  "max_steps": 126,
+  "num_train_epochs": 7,
   "total_flos": 0,
   "train_batch_size": 1,
   "effective_batch_size": 16,

training_state.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b617b2a3ab7ba08cac1c55c6a02085d0d226885f2a225d7626553579ac8029ab
 size 1514911851

 version https://git-lfs.github.com/spec/v1
+oid sha256:6733079c08d0e2c4e1bec055b3d45e16f552e09e2b8027d98d7dba03554b4300
 size 1514911851