Backup-bdg commited on
Commit
bdf44fc
·
verified ·
1 Parent(s): 6c5e835

Update model weights after training (epoch 4, loss 3.6467)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cc0af3086d6987e71c00b7121394b8ac820d2276ff994014479d4fc2cf094bf
3
  size 1458415836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56f890d89fb28ae5df4b1ab79c42b0c29edc81203f2a99ea077d47909b6d128a
3
  size 1458415836
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9034e718a6461ed04d5723c8ecf429d0daedb7fc49274a1fbd17b80bb9dd77b9
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6a01050517c1c2762257ca7f0c03259704ec620070948f9cccd9c26476fcae
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4ce3c75e0ec09d93c0580ed862f12191e3ea3bd53ab8906e0108cafd5d6fc18
3
  size 1506831304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:816725a4e4eaaf6f5a2bb5c3cb678c13f298ccc280937e88c3c947d9fc052fb3
3
  size 1506831304
streaming_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5,
3
- "unique_samples": 1100,
4
- "total_yields": 2200,
5
  "dataset_positions": {
6
  "WebSight": 186,
7
  "ScienceQA": 164,
@@ -9,10 +9,109 @@
9
  "Flickr8k": 186,
10
  "NewYorker": 186,
11
  "Football": 6,
12
- "MagicBrush": 186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  },
14
  "modality_positions": {
15
- "text": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "image": {
17
  "WebSight": 186,
18
  "ScienceQA": 164,
@@ -26,8 +125,8 @@
26
  "audio": {}
27
  },
28
  "modality_counts": {
29
- "text": 0,
30
- "image": 300,
31
  "video": 0,
32
  "audio": 0
33
  },
 
1
  {
2
+ "epoch": 12,
3
+ "unique_samples": 3029,
4
+ "total_yields": 6058,
5
  "dataset_positions": {
6
  "WebSight": 186,
7
  "ScienceQA": 164,
 
9
  "Flickr8k": 186,
10
  "NewYorker": 186,
11
  "Football": 6,
12
+ "MagicBrush": 186,
13
+ "WildChat": 200,
14
+ "Synth-ShellExecution": 200,
15
+ "Midjourney-Prompts": 200,
16
+ "Synth-KnowledgeCutoff": 200,
17
+ "Synth-GroundedResponse": 200,
18
+ "CodeParrot-Clean": 200,
19
+ "ShareGPT-Clean": 200,
20
+ "Synth-Issues": 200,
21
+ "Dolly-15k": 200,
22
+ "Conversation-Summarization": 200,
23
+ "Synth-ShellTimeout": 200,
24
+ "Synth-Docker": 200,
25
+ "Synth-Documents": 200,
26
+ "HumanEval-JavaScript": 164,
27
+ "OpenOrca": 200,
28
+ "Synth-MultiStepExecution": 200,
29
+ "Synth-Citation": 200,
30
+ "NoRobots": 200,
31
+ "Synth-LanguageSetup": 200,
32
+ "Function-Calling-ChatML": 200,
33
+ "Synth-CoT": 200,
34
+ "Python-Code-18k": 200,
35
+ "Code-Feedback": 200,
36
+ "HumanEval-CPP": 164,
37
+ "AgentInstruct": 195,
38
+ "SD-Prompts": 200,
39
+ "Synth-Diffs": 200,
40
+ "Golang-Coder": 200,
41
+ "Synth-ConfidenceLevel": 200,
42
+ "Synth-RepoContext": 200,
43
+ "HumanEval-Go": 164,
44
+ "Synth-SelfCorrection": 200,
45
+ "Synth-FactCheck": 200,
46
+ "Synth-Downloads": 200,
47
+ "Synth-RetrievalGrounded": 200,
48
+ "Synth-IDK": 200,
49
+ "Synth-APIGen": 200,
50
+ "Synth-PythonScripts": 200,
51
+ "Synth-Uncertainty": 200,
52
+ "HumanEval-Python": 164,
53
+ "Golang-QA-2k": 200,
54
+ "Synth-ShellErrors": 200,
55
+ "Synth-Jupyter": 200,
56
+ "Jupyter-Code": 200,
57
+ "Synth-Execution": 200,
58
+ "Synth-Monitoring": 200,
59
+ "Synth-DatabaseSetup": 200,
60
+ "HumanEval-Java": 164,
61
+ "Synth-AptInstall": 200,
62
+ "UltraChat": 200,
63
+ "Synth-DesktopSetup": 200,
64
+ "SD-Prompts-2M": 200,
65
+ "Synth-WebserverSetup": 200,
66
+ "Pythonic-Function-Calling": 200,
67
+ "Swift-Code-Edit": 10,
68
+ "Glaive-Code-Assistant": 200,
69
+ "File-Operations-Medium": 200,
70
+ "Swift-Code-RLVR": 200,
71
+ "Synth-SSHSetup": 200,
72
+ "HumanEval-Rust": 164,
73
+ "Synth-Commits": 200,
74
+ "Synth-FIM": 200,
75
+ "Synth-Debugging": 200,
76
+ "Tool-Calls-SingleTurn": 200,
77
+ "Tool-Calls-Multiturn": 200,
78
+ "OpenAssistant": 200
79
  },
80
  "modality_positions": {
81
+ "text": {
82
+ "WildChat": 200,
83
+ "Midjourney-Prompts": 200,
84
+ "CodeParrot-Clean": 200,
85
+ "ShareGPT-Clean": 200,
86
+ "Dolly-15k": 200,
87
+ "Conversation-Summarization": 200,
88
+ "HumanEval-JavaScript": 164,
89
+ "OpenOrca": 200,
90
+ "NoRobots": 200,
91
+ "Function-Calling-ChatML": 200,
92
+ "Python-Code-18k": 200,
93
+ "Code-Feedback": 200,
94
+ "HumanEval-CPP": 164,
95
+ "AgentInstruct": 195,
96
+ "SD-Prompts": 200,
97
+ "Golang-Coder": 200,
98
+ "HumanEval-Go": 164,
99
+ "Synth-APIGen": 200,
100
+ "HumanEval-Python": 164,
101
+ "Golang-QA-2k": 200,
102
+ "Jupyter-Code": 200,
103
+ "HumanEval-Java": 164,
104
+ "UltraChat": 200,
105
+ "SD-Prompts-2M": 200,
106
+ "Pythonic-Function-Calling": 200,
107
+ "Swift-Code-Edit": 10,
108
+ "Glaive-Code-Assistant": 200,
109
+ "Swift-Code-RLVR": 200,
110
+ "HumanEval-Rust": 164,
111
+ "Tool-Calls-SingleTurn": 200,
112
+ "Tool-Calls-Multiturn": 200,
113
+ "OpenAssistant": 200
114
+ },
115
  "image": {
116
  "WebSight": 186,
117
  "ScienceQA": 164,
 
125
  "audio": {}
126
  },
127
  "modality_counts": {
128
+ "text": 3029,
129
+ "image": 0,
130
  "video": 0,
131
  "audio": 0
132
  },
trainer_state.json CHANGED
@@ -1,31 +1,31 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 4.019162586334472,
4
- "epoch": 1,
5
- "epochs_completed": 1,
6
- "global_step": 37,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 37,
12
- "num_train_epochs": 1,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
19
- "vision",
20
  "llm",
21
  "cross_attention",
22
- "image_generation",
23
  "modality_markers"
24
  ],
25
  "frozen_components": [
 
26
  "video",
27
  "audio",
28
  "speech",
 
29
  "video_generation"
30
  ],
31
  "trial_name": null,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 3.646694382440487,
4
+ "epoch": 4,
5
+ "epochs_completed": 4,
6
+ "global_step": 1597,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 1597,
12
+ "num_train_epochs": 4,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
15
  "effective_batch_size": 16,
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
 
19
  "llm",
20
  "cross_attention",
 
21
  "modality_markers"
22
  ],
23
  "frozen_components": [
24
+ "vision",
25
  "video",
26
  "audio",
27
  "speech",
28
+ "image_generation",
29
  "video_generation"
30
  ],
31
  "trial_name": null,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cff88f1a8ee14094dfffadc0ac06d52480a2d90bd740252423ecf77cdef8f6cc
3
- size 1419723549
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be0dff94c6d235091cae224c3e034a33fa84932af351d74ac37a512956c5486
3
+ size 781495681