Backup-bdg commited on
Commit
895b712
·
verified ·
1 Parent(s): fd9897f

Update model weights after training (epoch 3, loss 3.3970)

Browse files
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a410f48cda8debf8f2d2be1fb09c1d17576889da07735b3bb35816da1f73c91a
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deec8e7a08caa092ae8f2831f90c3a76bd49297d6cc2c0fd8daf80bf163b2128
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:764e3c0c4f9f4848eaf07fd785e8be9f6fb0e486e607c14ff00311632e59694d
3
  size 1506836434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e14456b6c0badb864ac7491d545f18369dd99ad87a247bb0ad716911b28fea
3
  size 1506836434
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 153,
3
- "unique_samples": 600,
4
- "total_yields": 1200,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -10,26 +10,26 @@
10
  "NewYorker": 386,
11
  "Football": 6,
12
  "MagicBrush": 386,
13
- "WildChat": 200,
14
- "Synth-ShellExecution": 200,
15
  "Midjourney-Prompts": 200,
16
  "Synth-KnowledgeCutoff": 550,
17
  "Synth-GroundedResponse": 550,
18
  "CodeParrot-Clean": 350,
19
- "ShareGPT-Clean": 200,
20
  "Synth-Issues": 350,
21
  "Dolly-15k": 800,
22
  "Conversation-Summarization": 800,
23
- "Synth-ShellTimeout": 200,
24
- "Synth-Docker": 200,
25
  "Synth-Documents": 450,
26
  "HumanEval-JavaScript": 164,
27
  "OpenOrca": 800,
28
- "Synth-MultiStepExecution": 200,
29
  "Synth-Citation": 550,
30
  "NoRobots": 800,
31
- "Synth-LanguageSetup": 200,
32
- "Function-Calling-ChatML": 200,
33
  "Synth-CoT": 900,
34
  "Python-Code-18k": 350,
35
  "Code-Feedback": 350,
@@ -43,38 +43,38 @@
43
  "HumanEval-Go": 164,
44
  "Synth-SelfCorrection": 550,
45
  "Synth-FactCheck": 550,
46
- "Synth-Downloads": 200,
47
  "Synth-RetrievalGrounded": 550,
48
  "Synth-IDK": 550,
49
- "Synth-APIGen": 200,
50
- "Synth-PythonScripts": 200,
51
  "Synth-Uncertainty": 550,
52
  "HumanEval-Python": 164,
53
  "Golang-QA-2k": 350,
54
- "Synth-ShellErrors": 200,
55
- "Synth-Jupyter": 200,
56
  "Jupyter-Code": 350,
57
- "Synth-Execution": 200,
58
- "Synth-Monitoring": 200,
59
- "Synth-DatabaseSetup": 200,
60
  "HumanEval-Java": 164,
61
- "Synth-AptInstall": 200,
62
- "UltraChat": 200,
63
- "Synth-DesktopSetup": 200,
64
  "SD-Prompts-2M": 200,
65
- "Synth-WebserverSetup": 200,
66
- "Pythonic-Function-Calling": 200,
67
  "Swift-Code-Edit": 10,
68
- "Glaive-Code-Assistant": 200,
69
- "File-Operations-Medium": 200,
70
  "Swift-Code-RLVR": 350,
71
- "Synth-SSHSetup": 200,
72
  "HumanEval-Rust": 164,
73
  "Synth-Commits": 350,
74
  "Synth-FIM": 350,
75
- "Synth-Debugging": 200,
76
- "Tool-Calls-SingleTurn": 200,
77
- "Tool-Calls-Multiturn": 200,
78
  "OpenAssistant": 800,
79
  "T2V-Sora-Preferences-2": 650,
80
  "T2V-Human-Preferences": 650,
@@ -99,20 +99,23 @@
99
  "Medical-Reasoning-SFT-Mega": 650,
100
  "Medical-O1-Reasoning-EN": 650,
101
  "OpenThoughts-114k": 350,
102
- "Bespoke-Stratos-17k": 350
 
 
 
103
  },
104
  "modality_positions": {
105
  "text": {
106
- "WildChat": 200,
107
  "Midjourney-Prompts": 200,
108
  "CodeParrot-Clean": 350,
109
- "ShareGPT-Clean": 200,
110
  "Dolly-15k": 800,
111
  "Conversation-Summarization": 800,
112
  "HumanEval-JavaScript": 164,
113
  "OpenOrca": 800,
114
  "NoRobots": 800,
115
- "Function-Calling-ChatML": 200,
116
  "Python-Code-18k": 350,
117
  "Code-Feedback": 350,
118
  "HumanEval-CPP": 164,
@@ -120,20 +123,20 @@
120
  "SD-Prompts": 200,
121
  "Golang-Coder": 350,
122
  "HumanEval-Go": 164,
123
- "Synth-APIGen": 200,
124
  "HumanEval-Python": 164,
125
  "Golang-QA-2k": 350,
126
  "Jupyter-Code": 350,
127
  "HumanEval-Java": 164,
128
- "UltraChat": 200,
129
  "SD-Prompts-2M": 200,
130
- "Pythonic-Function-Calling": 200,
131
  "Swift-Code-Edit": 10,
132
- "Glaive-Code-Assistant": 200,
133
  "Swift-Code-RLVR": 350,
134
  "HumanEval-Rust": 164,
135
- "Tool-Calls-SingleTurn": 200,
136
- "Tool-Calls-Multiturn": 200,
137
  "OpenAssistant": 800,
138
  "SmolTalk-OpenHermes": 600,
139
  "SmolTalk-All": 600,
@@ -164,7 +167,28 @@
164
  "Synth-Issues": 350,
165
  "Synth-Commits": 350,
166
  "Synth-FIM": 350,
167
- "Synth-Diffs": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  },
169
  "image": {
170
  "WebSight": 386,
@@ -189,11 +213,11 @@
189
  "audio": {}
190
  },
191
  "modality_counts": {
192
- "text": 350,
193
  "image": 0,
194
  "video": 0,
195
  "audio": 0,
196
- "agentic_coding": 250
197
  },
198
  "last_modality": null
199
  }
 
1
  {
2
+ "epoch": 158,
3
+ "unique_samples": 1500,
4
+ "total_yields": 3000,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
10
  "NewYorker": 386,
11
  "Football": 6,
12
  "MagicBrush": 386,
13
+ "WildChat": 350,
14
+ "Synth-ShellExecution": 350,
15
  "Midjourney-Prompts": 200,
16
  "Synth-KnowledgeCutoff": 550,
17
  "Synth-GroundedResponse": 550,
18
  "CodeParrot-Clean": 350,
19
+ "ShareGPT-Clean": 350,
20
  "Synth-Issues": 350,
21
  "Dolly-15k": 800,
22
  "Conversation-Summarization": 800,
23
+ "Synth-ShellTimeout": 350,
24
+ "Synth-Docker": 350,
25
  "Synth-Documents": 450,
26
  "HumanEval-JavaScript": 164,
27
  "OpenOrca": 800,
28
+ "Synth-MultiStepExecution": 350,
29
  "Synth-Citation": 550,
30
  "NoRobots": 800,
31
+ "Synth-LanguageSetup": 350,
32
+ "Function-Calling-ChatML": 350,
33
  "Synth-CoT": 900,
34
  "Python-Code-18k": 350,
35
  "Code-Feedback": 350,
 
43
  "HumanEval-Go": 164,
44
  "Synth-SelfCorrection": 550,
45
  "Synth-FactCheck": 550,
46
+ "Synth-Downloads": 350,
47
  "Synth-RetrievalGrounded": 550,
48
  "Synth-IDK": 550,
49
+ "Synth-APIGen": 350,
50
+ "Synth-PythonScripts": 350,
51
  "Synth-Uncertainty": 550,
52
  "HumanEval-Python": 164,
53
  "Golang-QA-2k": 350,
54
+ "Synth-ShellErrors": 350,
55
+ "Synth-Jupyter": 350,
56
  "Jupyter-Code": 350,
57
+ "Synth-Execution": 350,
58
+ "Synth-Monitoring": 350,
59
+ "Synth-DatabaseSetup": 350,
60
  "HumanEval-Java": 164,
61
+ "Synth-AptInstall": 350,
62
+ "UltraChat": 350,
63
+ "Synth-DesktopSetup": 350,
64
  "SD-Prompts-2M": 200,
65
+ "Synth-WebserverSetup": 350,
66
+ "Pythonic-Function-Calling": 350,
67
  "Swift-Code-Edit": 10,
68
+ "Glaive-Code-Assistant": 350,
69
+ "File-Operations-Medium": 350,
70
  "Swift-Code-RLVR": 350,
71
+ "Synth-SSHSetup": 350,
72
  "HumanEval-Rust": 164,
73
  "Synth-Commits": 350,
74
  "Synth-FIM": 350,
75
+ "Synth-Debugging": 350,
76
+ "Tool-Calls-SingleTurn": 350,
77
+ "Tool-Calls-Multiturn": 350,
78
  "OpenAssistant": 800,
79
  "T2V-Sora-Preferences-2": 650,
80
  "T2V-Human-Preferences": 650,
 
99
  "Medical-Reasoning-SFT-Mega": 650,
100
  "Medical-O1-Reasoning-EN": 650,
101
  "OpenThoughts-114k": 350,
102
+ "Bespoke-Stratos-17k": 350,
103
+ "Synth-FileOps": 150,
104
+ "Synth-EditLines": 150,
105
+ "Agentic-CoT-Coding": 150
106
  },
107
  "modality_positions": {
108
  "text": {
109
+ "WildChat": 350,
110
  "Midjourney-Prompts": 200,
111
  "CodeParrot-Clean": 350,
112
+ "ShareGPT-Clean": 350,
113
  "Dolly-15k": 800,
114
  "Conversation-Summarization": 800,
115
  "HumanEval-JavaScript": 164,
116
  "OpenOrca": 800,
117
  "NoRobots": 800,
118
+ "Function-Calling-ChatML": 350,
119
  "Python-Code-18k": 350,
120
  "Code-Feedback": 350,
121
  "HumanEval-CPP": 164,
 
123
  "SD-Prompts": 200,
124
  "Golang-Coder": 350,
125
  "HumanEval-Go": 164,
126
+ "Synth-APIGen": 350,
127
  "HumanEval-Python": 164,
128
  "Golang-QA-2k": 350,
129
  "Jupyter-Code": 350,
130
  "HumanEval-Java": 164,
131
+ "UltraChat": 350,
132
  "SD-Prompts-2M": 200,
133
+ "Pythonic-Function-Calling": 350,
134
  "Swift-Code-Edit": 10,
135
+ "Glaive-Code-Assistant": 350,
136
  "Swift-Code-RLVR": 350,
137
  "HumanEval-Rust": 164,
138
+ "Tool-Calls-SingleTurn": 350,
139
+ "Tool-Calls-Multiturn": 350,
140
  "OpenAssistant": 800,
141
  "SmolTalk-OpenHermes": 600,
142
  "SmolTalk-All": 600,
 
167
  "Synth-Issues": 350,
168
  "Synth-Commits": 350,
169
  "Synth-FIM": 350,
170
+ "Synth-Diffs": 350,
171
+ "Synth-Monitoring": 350,
172
+ "Synth-FileOps": 150,
173
+ "Synth-Debugging": 350,
174
+ "Synth-Downloads": 350,
175
+ "Synth-ShellErrors": 350,
176
+ "Synth-DesktopSetup": 350,
177
+ "Synth-ShellExecution": 350,
178
+ "Synth-LanguageSetup": 350,
179
+ "Synth-DatabaseSetup": 350,
180
+ "Synth-MultiStepExecution": 350,
181
+ "Synth-Jupyter": 350,
182
+ "File-Operations-Medium": 350,
183
+ "Synth-ShellTimeout": 350,
184
+ "Synth-Docker": 350,
185
+ "Synth-SSHSetup": 350,
186
+ "Synth-EditLines": 150,
187
+ "Synth-AptInstall": 350,
188
+ "Synth-Execution": 350,
189
+ "Synth-PythonScripts": 350,
190
+ "Synth-WebserverSetup": 350,
191
+ "Agentic-CoT-Coding": 150
192
  },
193
  "image": {
194
  "WebSight": 386,
 
213
  "audio": {}
214
  },
215
  "modality_counts": {
216
+ "text": 500,
217
  "image": 0,
218
  "video": 0,
219
  "audio": 0,
220
+ "agentic_coding": 1000
221
  },
222
  "last_modality": null
223
  }
trainer_state.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 4.816719281872113,
4
  "epoch": 3,
5
  "epochs_completed": 3,
6
- "global_step": 225,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 225,
12
  "num_train_epochs": 3,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 3.3970277398874362,
4
  "epoch": 3,
5
  "epochs_completed": 3,
6
+ "global_step": 561,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 561,
12
  "num_train_epochs": 3,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:293fb75013be44e5d744ce753ca2e022778ec21d4c5bdb1776a5416d3f95f90d
3
- size 1514916733
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68c3e0a999f4613a219fa4812a9e409690b7cabef93d848a043cf5c66e2b3b9
3
+ size 1514917181