Backup-bdg commited on
Commit
0ec3133
·
verified ·
1 Parent(s): 39c6cc5

Update model weights after training (epoch 7, loss 7.1865)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e161c50803ebcf38666c109dde1baef4dc92fa5db9967fbd8e72f2b5af392b76
3
  size 1458410612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a240617216d0644ac615eed664398b69c732420dba3c0121a5c1344652fb7fa
3
  size 1458410612
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ba284496e9f5135658519fcb742fe092211dfd5df2aa73ac4e3effa31fb5319
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5b0a5a4040b026f16479ff817aadab4e42a2281750c7728f70aba9fd988a1f
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:050b46cae242ff76f36def2a463e491fd494e2a8f5ba239229e8cd25851300e5
3
  size 1506832040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeebe91f597a9aba32227820d24ec97ec34c5680090cd85ad3006eeefb812081
3
  size 1506832040
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 71,
3
- "unique_samples": 400,
4
- "total_yields": 800,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -94,7 +94,10 @@
94
  "Cosmopedia-KhanAcademy": 250,
95
  "NuminaMath-TIR": 250,
96
  "UltraData-Math-QA": 250,
97
- "Cosmopedia-OpenStax": 250
 
 
 
98
  },
99
  "modality_positions": {
100
  "text": {
@@ -139,7 +142,10 @@
139
  "Cosmopedia-KhanAcademy": 250,
140
  "NuminaMath-TIR": 250,
141
  "UltraData-Math-QA": 250,
142
- "Cosmopedia-OpenStax": 250
 
 
 
143
  },
144
  "image": {
145
  "WebSight": 386,
@@ -164,7 +170,7 @@
164
  "audio": {}
165
  },
166
  "modality_counts": {
167
- "text": 400,
168
  "image": 0,
169
  "video": 0,
170
  "audio": 0
 
1
  {
2
+ "epoch": 84,
3
+ "unique_samples": 150,
4
+ "total_yields": 300,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
94
  "Cosmopedia-KhanAcademy": 250,
95
  "NuminaMath-TIR": 250,
96
  "UltraData-Math-QA": 250,
97
+ "Cosmopedia-OpenStax": 250,
98
+ "MedMCQA": 350,
99
+ "Medical-Reasoning-SFT-Mega": 350,
100
+ "Medical-O1-Reasoning-EN": 350
101
  },
102
  "modality_positions": {
103
  "text": {
 
142
  "Cosmopedia-KhanAcademy": 250,
143
  "NuminaMath-TIR": 250,
144
  "UltraData-Math-QA": 250,
145
+ "Cosmopedia-OpenStax": 250,
146
+ "MedMCQA": 350,
147
+ "Medical-Reasoning-SFT-Mega": 350,
148
+ "Medical-O1-Reasoning-EN": 350
149
  },
150
  "image": {
151
  "WebSight": 386,
 
170
  "audio": {}
171
  },
172
  "modality_counts": {
173
+ "text": 150,
174
  "image": 0,
175
  "video": 0,
176
  "audio": 0
trainer_state.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 6.564389287829399,
4
- "epoch": 5,
5
- "epochs_completed": 5,
6
- "global_step": 250,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 250,
12
- "num_train_epochs": 5,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 7.186485166748365,
4
+ "epoch": 7,
5
+ "epochs_completed": 7,
6
+ "global_step": 126,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 126,
12
+ "num_train_epochs": 7,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b617b2a3ab7ba08cac1c55c6a02085d0d226885f2a225d7626553579ac8029ab
3
  size 1514911851
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6733079c08d0e2c4e1bec055b3d45e16f552e09e2b8027d98d7dba03554b4300
3
  size 1514911851