Backup-bdg commited on
Commit
2068eeb
·
verified ·
1 Parent(s): 4467801

Update model weights after training (epoch 1, loss 6.6292)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:261d0afbb5e08b6b1900e3dea25eb42c412e5542bee5a4f0681a898ae9c8bcd8
3
  size 1458415836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3226fee536a749a40aab83f5afa949808d778485026f933161c2d0a6b66c03f9
3
  size 1458415836
config.json CHANGED
@@ -5,7 +5,7 @@
5
  "num_layers": 12,
6
  "num_heads": 16,
7
  "intermediate_size": 2048,
8
- "vocab_size": 151643,
9
  "max_position_embeddings": 131072,
10
  "rms_norm_eps": 1e-06,
11
  "use_ring_attention": true,
 
5
  "num_layers": 12,
6
  "num_heads": 16,
7
  "intermediate_size": 2048,
8
+ "vocab_size": 152200,
9
  "max_position_embeddings": 131072,
10
  "rms_norm_eps": 1e-06,
11
  "use_ring_attention": true,
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a688ad60e1a8efc783cfa66ad0ed5e9c4e74a0f5437e134ea717bdbb761eb3
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1dd70b1b4136042c3241058967ff7fb8423547263fe302498c3cc9f2ab00703
3
  size 174191400
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f05d7774509a9338dd769956837f7b62c63ad4ab45a56fbb919230f51c876c6
3
  size 1506831304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84ad6f98c7c9d20394a4a356dd6f56d27ee8ada70d3a891c1e8e557df3280dd
3
  size 1506831304
streaming_state.json CHANGED
@@ -1,21 +1,117 @@
1
  {
2
  "epoch": 1,
3
- "unique_samples": 1,
4
- "total_yields": 2,
5
  "dataset_positions": {
6
- "InstructPix2Pix": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  },
8
  "modality_positions": {
9
- "text": {},
10
- "image": {
11
- "InstructPix2Pix": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  },
 
13
  "video": {},
14
  "audio": {}
15
  },
16
  "modality_counts": {
17
- "text": 0,
18
- "image": 1,
19
  "video": 0,
20
  "audio": 0
21
  },
 
1
  {
2
  "epoch": 1,
3
+ "unique_samples": 3260,
4
+ "total_yields": 6520,
5
  "dataset_positions": {
6
+ "Synth-SelfCorrection": 50,
7
+ "Synth-Documents": 50,
8
+ "Synth-ShellTimeout": 50,
9
+ "Jupyter-Code": 50,
10
+ "HumanEval-JavaScript": 50,
11
+ "Synth-DesktopSetup": 50,
12
+ "UltraChat": 50,
13
+ "HumanEval-Python": 50,
14
+ "Dolly-15k": 50,
15
+ "Synth-ShellExecution": 50,
16
+ "Midjourney-Prompts": 50,
17
+ "Synth-PythonScripts": 50,
18
+ "Synth-Issues": 50,
19
+ "Synth-Monitoring": 50,
20
+ "Synth-KnowledgeCutoff": 50,
21
+ "Synth-Uncertainty": 50,
22
+ "Swift-Code-RLVR": 50,
23
+ "HumanEval-CPP": 50,
24
+ "Synth-CoT": 50,
25
+ "Synth-Debugging": 50,
26
+ "Swift-Code-Edit": 10,
27
+ "SD-Prompts-2M": 50,
28
+ "Synth-WebserverSetup": 50,
29
+ "Synth-SSHSetup": 50,
30
+ "File-Operations-Medium": 50,
31
+ "Python-Code-18k": 50,
32
+ "Synth-RepoContext": 50,
33
+ "Synth-IDK": 50,
34
+ "WildChat": 50,
35
+ "Synth-FIM": 50,
36
+ "Synth-GroundedResponse": 50,
37
+ "Synth-AptInstall": 50,
38
+ "Golang-Coder": 50,
39
+ "HumanEval-Java": 50,
40
+ "AgentInstruct": 50,
41
+ "Function-Calling-ChatML": 50,
42
+ "Synth-Downloads": 50,
43
+ "Synth-MultiStepExecution": 50,
44
+ "Synth-RetrievalGrounded": 50,
45
+ "Pythonic-Function-Calling": 50,
46
+ "OpenOrca": 50,
47
+ "Synth-Citation": 50,
48
+ "Golang-QA-2k": 50,
49
+ "Synth-APIGen": 50,
50
+ "CodeParrot-Clean": 50,
51
+ "Synth-Jupyter": 50,
52
+ "Synth-ShellErrors": 50,
53
+ "NoRobots": 50,
54
+ "Synth-Docker": 50,
55
+ "Glaive-Code-Assistant": 50,
56
+ "Synth-Diffs": 50,
57
+ "ShareGPT-Clean": 50,
58
+ "Code-Feedback": 50,
59
+ "Conversation-Summarization": 50,
60
+ "SD-Prompts": 50,
61
+ "Synth-LanguageSetup": 50,
62
+ "Synth-FactCheck": 50,
63
+ "Synth-Execution": 50,
64
+ "HumanEval-Rust": 50,
65
+ "Synth-DatabaseSetup": 50,
66
+ "Synth-ConfidenceLevel": 50,
67
+ "Synth-Commits": 50,
68
+ "HumanEval-Go": 50,
69
+ "Tool-Calls-Multiturn": 50,
70
+ "OpenAssistant": 50,
71
+ "Tool-Calls-SingleTurn": 50
72
  },
73
  "modality_positions": {
74
+ "text": {
75
+ "Jupyter-Code": 50,
76
+ "HumanEval-JavaScript": 50,
77
+ "UltraChat": 50,
78
+ "HumanEval-Python": 50,
79
+ "Dolly-15k": 50,
80
+ "Midjourney-Prompts": 50,
81
+ "Swift-Code-RLVR": 50,
82
+ "HumanEval-CPP": 50,
83
+ "Swift-Code-Edit": 10,
84
+ "SD-Prompts-2M": 50,
85
+ "Python-Code-18k": 50,
86
+ "WildChat": 50,
87
+ "Golang-Coder": 50,
88
+ "HumanEval-Java": 50,
89
+ "AgentInstruct": 50,
90
+ "Function-Calling-ChatML": 50,
91
+ "Pythonic-Function-Calling": 50,
92
+ "OpenOrca": 50,
93
+ "Golang-QA-2k": 50,
94
+ "Synth-APIGen": 50,
95
+ "CodeParrot-Clean": 50,
96
+ "NoRobots": 50,
97
+ "Glaive-Code-Assistant": 50,
98
+ "ShareGPT-Clean": 50,
99
+ "Code-Feedback": 50,
100
+ "Conversation-Summarization": 50,
101
+ "SD-Prompts": 50,
102
+ "HumanEval-Rust": 50,
103
+ "HumanEval-Go": 50,
104
+ "Tool-Calls-Multiturn": 50,
105
+ "OpenAssistant": 50,
106
+ "Tool-Calls-SingleTurn": 50
107
  },
108
+ "image": {},
109
  "video": {},
110
  "audio": {}
111
  },
112
  "modality_counts": {
113
+ "text": 3260,
114
+ "image": 0,
115
  "video": 0,
116
  "audio": 0
117
  },
trainer_state.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 12.373827934265137,
4
  "epoch": 1,
5
  "epochs_completed": 1,
6
- "global_step": 0,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
- "max_steps": 0,
12
  "num_train_epochs": 1,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
@@ -16,16 +16,16 @@
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
19
- "vision",
20
  "llm",
21
  "cross_attention",
22
- "image_generation",
23
  "modality_markers"
24
  ],
25
  "frozen_components": [
 
26
  "video",
27
  "audio",
28
  "speech",
 
29
  "video_generation"
30
  ],
31
  "trial_name": null,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 6.629150597175206,
4
  "epoch": 1,
5
  "epochs_completed": 1,
6
+ "global_step": 407,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [],
10
  "logging_steps": 50,
11
+ "max_steps": 407,
12
  "num_train_epochs": 1,
13
  "total_flos": 0,
14
  "train_batch_size": 1,
 
16
  "learning_rate": 0.0001,
17
  "max_grad_norm": 1.0,
18
  "trainable_components": [
 
19
  "llm",
20
  "cross_attention",
 
21
  "modality_markers"
22
  ],
23
  "frozen_components": [
24
+ "vision",
25
  "video",
26
  "audio",
27
  "speech",
28
+ "image_generation",
29
  "video_generation"
30
  ],
31
  "trial_name": null,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c47de74291bae883f60d4986fe8dbc38a2c68de7574fdabc66ce46222ed711
3
- size 5143
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d32523d7fc44d7f0f1c884a1463149d48212161a3bfbd0f82b045cf4a1d583a2
3
+ size 781490561