Backup-bdg commited on
Commit
a0c2e3b
ยท
verified ยท
1 Parent(s): 3d55c38

Update model weights after training (epoch 1, loss 12.6406)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:345c19f21b1d9f02ad3054d3cc7e9e48ea5e82c276334869646391d4b0596e79
3
  size 1458415836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69fb66efd2fcf1cca60ed861cab7e732be1f5afd1da828c8e756ecdbeaba07e4
3
  size 1458415836
audio_encoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:258d79b8e1b755907bfd5083947451386aa41f8370b4f56c883ba9e9f54f46ba
3
  size 466150140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12e4401d2925cec8a9b29973f98528943b9ccd70107fb8d90baacd11a897b051
3
  size 466150140
audio_projector.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c48261a28a843318cfc7afa7e5ff9e70a997f465b78ea8be64509c42861c56eb
3
  size 2099352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d836099185cc6b6e01afdc72679fb120799bc0e3109a86779d04de093ee5b4f2
3
  size 2099352
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d183ddac32911ec28151d1569c00bd5804910fc622fd35853ed661b7ffa4f5a3
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8d2d5d278130bf1d488e55783917dd6f62c1c5d1c30467339dba224b2ac890c
3
  size 174191400
generator.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83d86a5edad6600d32976ad9dbce1b2202d40f42429d1bcd44484f00f6caf6b3
3
  size 629440508
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e586a0ad33c4d788f5d915cda9ade7766844890c1fea0d91d514644c8b415f6
3
  size 629440508
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1a06cd599c200f1965030fd038048e2437decebe316d5ad0167d2908b217128
3
  size 1506831304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076b2a9ee201e21b04fa3113d34fb156cba0ac55c6cfbcc8c0bd79d393f00aca
3
  size 1506831304
modality_markers.safetensors CHANGED
Binary files a/modality_markers.safetensors and b/modality_markers.safetensors differ
 
modeling_xoron.py CHANGED
@@ -10186,6 +10186,19 @@ class XoronModel(XoronPreTrainedModel):
10186
  if component is not None:
10187
  with safe_open(comp_path, framework="pt") as f:
10188
  state_dict = {k: f.get_tensor(k) for k in f.keys()}
 
 
 
 
 
 
 
 
 
 
 
 
 
10189
  component.load_state_dict(state_dict, strict=False)
10190
  print(f" โœ… Loaded {comp_name}")
10191
 
@@ -11089,6 +11102,19 @@ class XoronModel(XoronPreTrainedModel):
11089
  if component is not None:
11090
  with safe_open(comp_path, framework="pt") as f:
11091
  state_dict = {k: f.get_tensor(k) for k in f.keys()}
 
 
 
 
 
 
 
 
 
 
 
 
 
11092
  component.load_state_dict(state_dict, strict=False)
11093
  print(f" โœ… Loaded {comp_name}")
11094
 
 
10186
  if component is not None:
10187
  with safe_open(comp_path, framework="pt") as f:
10188
  state_dict = {k: f.get_tensor(k) for k in f.keys()}
10189
+
10190
+ # Handle vocab size mismatch for LLM component
10191
+ if comp_name == 'llm':
10192
+ # Check if embed_tokens size differs
10193
+ embed_key = 'model.embed_tokens.weight'
10194
+ if embed_key in state_dict:
10195
+ saved_vocab_size = state_dict[embed_key].shape[0]
10196
+ current_vocab_size = component.model.embed_tokens.weight.shape[0]
10197
+
10198
+ if saved_vocab_size != current_vocab_size:
10199
+ print(f" ๐Ÿ“ Resizing embeddings: {current_vocab_size} -> {saved_vocab_size}")
10200
+ component.resize_token_embeddings(saved_vocab_size)
10201
+
10202
  component.load_state_dict(state_dict, strict=False)
10203
  print(f" โœ… Loaded {comp_name}")
10204
 
 
11102
  if component is not None:
11103
  with safe_open(comp_path, framework="pt") as f:
11104
  state_dict = {k: f.get_tensor(k) for k in f.keys()}
11105
+
11106
+ # Handle vocab size mismatch for LLM component
11107
+ if comp_name == 'llm':
11108
+ # Check if embed_tokens size differs
11109
+ embed_key = 'model.embed_tokens.weight'
11110
+ if embed_key in state_dict:
11111
+ saved_vocab_size = state_dict[embed_key].shape[0]
11112
+ current_vocab_size = component.model.embed_tokens.weight.shape[0]
11113
+
11114
+ if saved_vocab_size != current_vocab_size:
11115
+ print(f" ๐Ÿ“ Resizing embeddings: {current_vocab_size} -> {saved_vocab_size}")
11116
+ component.resize_token_embeddings(saved_vocab_size)
11117
+
11118
  component.load_state_dict(state_dict, strict=False)
11119
  print(f" โœ… Loaded {comp_name}")
11120
 
projector.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42848d798e1fd88408f15cb242e3e117a6dc1166a68d85bd2b60d556902b3567
3
  size 52880664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7c80f806f46f1616d3acfe6048b56e597e36027b7700eefd9ca13e5b868da9
3
  size 52880664
streaming_state.json CHANGED
@@ -3,12 +3,12 @@
3
  "unique_samples": 1,
4
  "total_yields": 2,
5
  "dataset_positions": {
6
- "Football": 1
7
  },
8
  "modality_positions": {
9
  "text": {},
10
  "image": {
11
- "Football": 1
12
  },
13
  "video": {},
14
  "audio": {}
 
3
  "unique_samples": 1,
4
  "total_yields": 2,
5
  "dataset_positions": {
6
+ "InstructPix2Pix": 1
7
  },
8
  "modality_positions": {
9
  "text": {},
10
  "image": {
11
+ "InstructPix2Pix": 1
12
  },
13
  "video": {},
14
  "audio": {}
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 12.606770992279053,
4
  "epoch": 1,
5
  "epochs_completed": 1,
6
  "global_step": 0,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 12.640625,
4
  "epoch": 1,
5
  "epochs_completed": 1,
6
  "global_step": 0,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:486d68f890c9804dd2c27b25f84601ad3fd4dab4089d237c2e7ffb37883a9a7b
3
  size 5143
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e348eb62b15d8fe7418f5c527b62ddecd4e918c36b6ebce28a035479b9432e
3
  size 5143
video_encoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c65e415613ad0a88b9d0a4f7b393d21f138a6f5be3aeb89bc26c010a028a447
3
  size 1923089112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f72e26338eb6855e72508d1a57a3d038d63c84ab92a9448307ad5b3430393bca
3
  size 1923089112
video_generator.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f86358c587f5dd7e17c5cd17898de9ffe1104fba7b7cba7a7ae2e4a54e3a9a14
3
  size 61574134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa2bef313e63c8c8dbead2f22c6e044abd56883629f7504e54953e37e62f00b
3
  size 61574134
vision_encoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a467be0cf69c30b067287b20aa89f9031a50dbe4106938a64aa87e922ad2d096
3
  size 1000535480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7028344414ab330bb1b2d321f2dd2e828f77a13da8b68c038a4790e170df2b
3
  size 1000535480
waveform_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7408a6ce0c03a47f12b4f4f3a2222f04cd7b3651dfcc1335aaa8e2648fddb029
3
  size 34681076
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bff6c300b238aa47a1673e2ffd1f65c029916e9271cd0952b3d6e873b11837a
3
  size 34681076