Backup-bdg commited on
Commit
e2adb57
·
verified ·
1 Parent(s): 5e6894f

Update model weights after training (epoch 7, loss 6.6223)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3d8275a11c6158d10ceebea39e190afac85a98214c0971b53d14fc16485311d
3
  size 1458410612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b122e6fd48e418062afa9a2835c289fd000c1b39c3339a4a203be9976b7ac486
3
  size 1458410612
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b33d56ad92215c91c9abeb210d299a9a5b5236a6bd185ccd17274fddce82686f
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e348f1b98e8cc48f633f80a818a98a727f8a95e3794d3d7496c7c67d319c21
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9e29bdb5011f77a07216e19cd7160ef7d5bf95fcad4eb3458ee32beea27a98c
3
  size 1506832040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3888d6f2029add98a6540daf90a2fffaf8b2c0420fca1b401042a37ae56f957f
3
  size 1506832040
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 122,
3
- "unique_samples": 150,
4
- "total_yields": 300,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
@@ -18,16 +18,16 @@
18
  "CodeParrot-Clean": 200,
19
  "ShareGPT-Clean": 200,
20
  "Synth-Issues": 200,
21
- "Dolly-15k": 450,
22
- "Conversation-Summarization": 450,
23
  "Synth-ShellTimeout": 200,
24
  "Synth-Docker": 200,
25
  "Synth-Documents": 450,
26
  "HumanEval-JavaScript": 164,
27
- "OpenOrca": 450,
28
  "Synth-MultiStepExecution": 200,
29
  "Synth-Citation": 550,
30
- "NoRobots": 450,
31
  "Synth-LanguageSetup": 200,
32
  "Function-Calling-ChatML": 200,
33
  "Synth-CoT": 550,
@@ -75,7 +75,7 @@
75
  "Synth-Debugging": 200,
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
- "OpenAssistant": 450,
79
  "T2V-Sora-Preferences-2": 650,
80
  "T2V-Human-Preferences": 650,
81
  "Sora-Alignment-Likert": 198,
@@ -85,8 +85,8 @@
85
  "Sora-Physics-Likert": 198,
86
  "TIP-I2V": 650,
87
  "Pexels-I2V-350k": 650,
88
- "SmolTalk-OpenHermes": 250,
89
- "SmolTalk-All": 250,
90
  "Cosmopedia-AutoMath": 600,
91
  "OpenMathInstruct-1": 600,
92
  "NuminaMath-CoT": 600,
@@ -105,11 +105,11 @@
105
  "Midjourney-Prompts": 200,
106
  "CodeParrot-Clean": 200,
107
  "ShareGPT-Clean": 200,
108
- "Dolly-15k": 450,
109
- "Conversation-Summarization": 450,
110
  "HumanEval-JavaScript": 164,
111
- "OpenOrca": 450,
112
- "NoRobots": 450,
113
  "Function-Calling-ChatML": 200,
114
  "Python-Code-18k": 200,
115
  "Code-Feedback": 200,
@@ -132,9 +132,9 @@
132
  "HumanEval-Rust": 164,
133
  "Tool-Calls-SingleTurn": 200,
134
  "Tool-Calls-Multiturn": 200,
135
- "OpenAssistant": 450,
136
- "SmolTalk-OpenHermes": 250,
137
- "SmolTalk-All": 250,
138
  "Cosmopedia-AutoMath": 600,
139
  "OpenMathInstruct-1": 600,
140
  "NuminaMath-CoT": 600,
@@ -179,7 +179,7 @@
179
  "audio": {}
180
  },
181
  "modality_counts": {
182
- "text": 150,
183
  "image": 0,
184
  "video": 0,
185
  "audio": 0
 
1
  {
2
+ "epoch": 135,
3
+ "unique_samples": 350,
4
+ "total_yields": 700,
5
  "dataset_positions": {
6
  "WebSight": 386,
7
  "ScienceQA": 364,
 
18
  "CodeParrot-Clean": 200,
19
  "ShareGPT-Clean": 200,
20
  "Synth-Issues": 200,
21
+ "Dolly-15k": 800,
22
+ "Conversation-Summarization": 800,
23
  "Synth-ShellTimeout": 200,
24
  "Synth-Docker": 200,
25
  "Synth-Documents": 450,
26
  "HumanEval-JavaScript": 164,
27
+ "OpenOrca": 800,
28
  "Synth-MultiStepExecution": 200,
29
  "Synth-Citation": 550,
30
+ "NoRobots": 800,
31
  "Synth-LanguageSetup": 200,
32
  "Function-Calling-ChatML": 200,
33
  "Synth-CoT": 550,
 
75
  "Synth-Debugging": 200,
76
  "Tool-Calls-SingleTurn": 200,
77
  "Tool-Calls-Multiturn": 200,
78
+ "OpenAssistant": 800,
79
  "T2V-Sora-Preferences-2": 650,
80
  "T2V-Human-Preferences": 650,
81
  "Sora-Alignment-Likert": 198,
 
85
  "Sora-Physics-Likert": 198,
86
  "TIP-I2V": 650,
87
  "Pexels-I2V-350k": 650,
88
+ "SmolTalk-OpenHermes": 600,
89
+ "SmolTalk-All": 600,
90
  "Cosmopedia-AutoMath": 600,
91
  "OpenMathInstruct-1": 600,
92
  "NuminaMath-CoT": 600,
 
105
  "Midjourney-Prompts": 200,
106
  "CodeParrot-Clean": 200,
107
  "ShareGPT-Clean": 200,
108
+ "Dolly-15k": 800,
109
+ "Conversation-Summarization": 800,
110
  "HumanEval-JavaScript": 164,
111
+ "OpenOrca": 800,
112
+ "NoRobots": 800,
113
  "Function-Calling-ChatML": 200,
114
  "Python-Code-18k": 200,
115
  "Code-Feedback": 200,
 
132
  "HumanEval-Rust": 164,
133
  "Tool-Calls-SingleTurn": 200,
134
  "Tool-Calls-Multiturn": 200,
135
+ "OpenAssistant": 800,
136
+ "SmolTalk-OpenHermes": 600,
137
+ "SmolTalk-All": 600,
138
  "Cosmopedia-AutoMath": 600,
139
  "OpenMathInstruct-1": 600,
140
  "NuminaMath-CoT": 600,
 
179
  "audio": {}
180
  },
181
  "modality_counts": {
182
+ "text": 350,
183
  "image": 0,
184
  "video": 0,
185
  "audio": 0
trainer_state.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 6.006418684224288,
4
- "epoch": 6,
5
- "epochs_completed": 6,
6
- "global_step": 108,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 108,
12
- "num_train_epochs": 6,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 6.622317645549774,
4
+ "epoch": 7,
5
+ "epochs_completed": 7,
6
+ "global_step": 301,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 301,
12
+ "num_train_epochs": 7,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31042e6c4b799d02f267c328d1af6d4c2361df10199ab9df2300d09974e87bc4
3
- size 1514911723
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a89e9a0652c7c060ae5d2f1211f9a8ce9e301009c1282faa827cfb44a01e4db3
3
+ size 1514912171