Backup-bdg commited on
Commit
4467801
Β·
verified Β·
1 Parent(s): a0c2e3b

Update model weights after training (epoch 1, loss 12.3738)

Browse files
audio_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69fb66efd2fcf1cca60ed861cab7e732be1f5afd1da828c8e756ecdbeaba07e4
3
  size 1458415836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:261d0afbb5e08b6b1900e3dea25eb42c412e5542bee5a4f0681a898ae9c8bcd8
3
  size 1458415836
audio_encoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12e4401d2925cec8a9b29973f98528943b9ccd70107fb8d90baacd11a897b051
3
  size 466150140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c6b7e43c61588099a04e970a49bd38fd73465ef129d8e39ed9a1e8c45aeecf
3
  size 466150140
audio_projector.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d836099185cc6b6e01afdc72679fb120799bc0e3109a86779d04de093ee5b4f2
3
  size 2099352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:728f8031a27e5eb3d42d6c7632872efcd0739f89bbddee2174bc9ed01776730b
3
  size 2099352
cross_attention.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8d2d5d278130bf1d488e55783917dd6f62c1c5d1c30467339dba224b2ac890c
3
  size 174191400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a688ad60e1a8efc783cfa66ad0ed5e9c4e74a0f5437e134ea717bdbb761eb3
3
  size 174191400
generator.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e586a0ad33c4d788f5d915cda9ade7766844890c1fea0d91d514644c8b415f6
3
  size 629440508
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c01fd06b809de1bc14f78c1fd5f2f2cac625db3b22f0b696d532e7442aee71a
3
  size 629440508
llm.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:076b2a9ee201e21b04fa3113d34fb156cba0ac55c6cfbcc8c0bd79d393f00aca
3
  size 1506831304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f05d7774509a9338dd769956837f7b62c63ad4ab45a56fbb919230f51c876c6
3
  size 1506831304
modality_markers.safetensors CHANGED
Binary files a/modality_markers.safetensors and b/modality_markers.safetensors differ
 
modeling_xoron.py CHANGED
@@ -10189,15 +10189,31 @@ class XoronModel(XoronPreTrainedModel):
10189
 
10190
  # Handle vocab size mismatch for LLM component
10191
  if comp_name == 'llm':
10192
- # Check if embed_tokens size differs
10193
  embed_key = 'model.embed_tokens.weight'
 
 
10194
  if embed_key in state_dict:
10195
  saved_vocab_size = state_dict[embed_key].shape[0]
 
10196
  current_vocab_size = component.model.embed_tokens.weight.shape[0]
10197
 
10198
  if saved_vocab_size != current_vocab_size:
10199
  print(f" πŸ“ Resizing embeddings: {current_vocab_size} -> {saved_vocab_size}")
10200
- component.resize_token_embeddings(saved_vocab_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10201
 
10202
  component.load_state_dict(state_dict, strict=False)
10203
  print(f" βœ… Loaded {comp_name}")
@@ -11105,15 +11121,31 @@ class XoronModel(XoronPreTrainedModel):
11105
 
11106
  # Handle vocab size mismatch for LLM component
11107
  if comp_name == 'llm':
11108
- # Check if embed_tokens size differs
11109
  embed_key = 'model.embed_tokens.weight'
 
 
11110
  if embed_key in state_dict:
11111
  saved_vocab_size = state_dict[embed_key].shape[0]
 
11112
  current_vocab_size = component.model.embed_tokens.weight.shape[0]
11113
 
11114
  if saved_vocab_size != current_vocab_size:
11115
  print(f" πŸ“ Resizing embeddings: {current_vocab_size} -> {saved_vocab_size}")
11116
- component.resize_token_embeddings(saved_vocab_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11117
 
11118
  component.load_state_dict(state_dict, strict=False)
11119
  print(f" βœ… Loaded {comp_name}")
 
10189
 
10190
  # Handle vocab size mismatch for LLM component
10191
  if comp_name == 'llm':
 
10192
  embed_key = 'model.embed_tokens.weight'
10193
+ lm_head_key = 'lm_head.weight'
10194
+
10195
  if embed_key in state_dict:
10196
  saved_vocab_size = state_dict[embed_key].shape[0]
10197
+ hidden_size = state_dict[embed_key].shape[1]
10198
  current_vocab_size = component.model.embed_tokens.weight.shape[0]
10199
 
10200
  if saved_vocab_size != current_vocab_size:
10201
  print(f" πŸ“ Resizing embeddings: {current_vocab_size} -> {saved_vocab_size}")
10202
+ # Manually resize embed_tokens
10203
+ new_embed = nn.Embedding(saved_vocab_size, hidden_size)
10204
+ new_embed.weight.data = state_dict[embed_key]
10205
+ component.model.embed_tokens = new_embed
10206
+
10207
+ # Manually resize lm_head if present
10208
+ if lm_head_key in state_dict:
10209
+ new_lm_head = nn.Linear(hidden_size, saved_vocab_size, bias=False)
10210
+ new_lm_head.weight.data = state_dict[lm_head_key]
10211
+ component.lm_head = new_lm_head
10212
+
10213
+ # Remove these keys from state_dict since we already loaded them
10214
+ del state_dict[embed_key]
10215
+ if lm_head_key in state_dict:
10216
+ del state_dict[lm_head_key]
10217
 
10218
  component.load_state_dict(state_dict, strict=False)
10219
  print(f" βœ… Loaded {comp_name}")
 
11121
 
11122
  # Handle vocab size mismatch for LLM component
11123
  if comp_name == 'llm':
 
11124
  embed_key = 'model.embed_tokens.weight'
11125
+ lm_head_key = 'lm_head.weight'
11126
+
11127
  if embed_key in state_dict:
11128
  saved_vocab_size = state_dict[embed_key].shape[0]
11129
+ hidden_size = state_dict[embed_key].shape[1]
11130
  current_vocab_size = component.model.embed_tokens.weight.shape[0]
11131
 
11132
  if saved_vocab_size != current_vocab_size:
11133
  print(f" πŸ“ Resizing embeddings: {current_vocab_size} -> {saved_vocab_size}")
11134
+ # Manually resize embed_tokens
11135
+ new_embed = nn.Embedding(saved_vocab_size, hidden_size)
11136
+ new_embed.weight.data = state_dict[embed_key]
11137
+ component.model.embed_tokens = new_embed
11138
+
11139
+ # Manually resize lm_head if present
11140
+ if lm_head_key in state_dict:
11141
+ new_lm_head = nn.Linear(hidden_size, saved_vocab_size, bias=False)
11142
+ new_lm_head.weight.data = state_dict[lm_head_key]
11143
+ component.lm_head = new_lm_head
11144
+
11145
+ # Remove these keys from state_dict since we already loaded them
11146
+ del state_dict[embed_key]
11147
+ if lm_head_key in state_dict:
11148
+ del state_dict[lm_head_key]
11149
 
11150
  component.load_state_dict(state_dict, strict=False)
11151
  print(f" βœ… Loaded {comp_name}")
projector.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa7c80f806f46f1616d3acfe6048b56e597e36027b7700eefd9ca13e5b868da9
3
  size 52880664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:899fc13cba54ecbf1eef6401108393ed0e9b8e4584e8d180947f87d4807c39dd
3
  size 52880664
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
- "best_metric": 12.640625,
4
  "epoch": 1,
5
  "epochs_completed": 1,
6
  "global_step": 0,
 
1
  {
2
  "best_model_checkpoint": "/kaggle/working/xoron-final",
3
+ "best_metric": 12.373827934265137,
4
  "epoch": 1,
5
  "epochs_completed": 1,
6
  "global_step": 0,
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9e348eb62b15d8fe7418f5c527b62ddecd4e918c36b6ebce28a035479b9432e
3
  size 5143
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c47de74291bae883f60d4986fe8dbc38a2c68de7574fdabc66ce46222ed711
3
  size 5143
video_encoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f72e26338eb6855e72508d1a57a3d038d63c84ab92a9448307ad5b3430393bca
3
  size 1923089112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70226e533706675adf13f72c46122854021d13fe388445bc4d6b7495fa64e3a
3
  size 1923089112
video_generator.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aa2bef313e63c8c8dbead2f22c6e044abd56883629f7504e54953e37e62f00b
3
  size 61574134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68805a467c37a4b172786a99fb83589c8a4e40b7b7a40886176cf1fd2188dc5
3
  size 61574134
vision_encoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc7028344414ab330bb1b2d321f2dd2e828f77a13da8b68c038a4790e170df2b
3
  size 1000535480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba3ed68a4082c90661c88d125f4dd6b40717652173dbdc31df0c8cc5fa6260a
3
  size 1000535480
waveform_decoder.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bff6c300b238aa47a1673e2ffd1f65c029916e9271cd0952b3d6e873b11837a
3
  size 34681076
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7416892f4786903c832800ef5baac11cf3a787979cf223afa39a00434b5c639f
3
  size 34681076