Zihan Min
commited on
Commit
·
6342c7b
1
Parent(s):
1e55ac0
upload 0.6+4_base fuser
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- qwen3_0.6b+qwen3_4b_base_Fuser/config.json +82 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/aggregator_config.json +1 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.pt +3 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.json +20 -0
- qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.pt +3 -0
qwen3_0.6b+qwen3_4b_base_Fuser/config.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_model: "Qwen/Qwen3-0.6B"
|
| 3 |
+
teacher_model: "Qwen/Qwen3-4B-Base"
|
| 4 |
+
include_response: false
|
| 5 |
+
is_do_alignment: false
|
| 6 |
+
alignment_strategy: "first"
|
| 7 |
+
projector:
|
| 8 |
+
type: "C2CProjector"
|
| 9 |
+
params:
|
| 10 |
+
hidden_dim: 1024
|
| 11 |
+
intermediate_dim: 1024
|
| 12 |
+
num_layers: 3
|
| 13 |
+
dropout: 0.1
|
| 14 |
+
initial_temperature: 1.0
|
| 15 |
+
final_temperature: 0.001
|
| 16 |
+
anneal_steps: 1953
|
| 17 |
+
# projector:
|
| 18 |
+
# type: "AllInOneProjector"
|
| 19 |
+
# params:
|
| 20 |
+
# hidden_dim: 1024
|
| 21 |
+
# weight_hidden_dim: 1024
|
| 22 |
+
# num_layers: 3
|
| 23 |
+
# dropout: 0.1
|
| 24 |
+
# activation: "gelu"
|
| 25 |
+
# use_layer_norm: true
|
| 26 |
+
# use_residual: true
|
| 27 |
+
# use_swiglu: true
|
| 28 |
+
# use_concat: true
|
| 29 |
+
# gate_granularity: "scalar"
|
| 30 |
+
# gate_depends_on_input: false
|
| 31 |
+
# gate_input_features: "target_key"
|
| 32 |
+
# gate_init_value: 0.0
|
| 33 |
+
# weight_granularity: "head_merged"
|
| 34 |
+
# weight_depends_on_input: true
|
| 35 |
+
# weight_input_features: "target_projected_key"
|
| 36 |
+
# weight_init_value: 0.0
|
| 37 |
+
# use_gumbel: true
|
| 38 |
+
# initial_temperature: 1.0
|
| 39 |
+
# final_temperature: 0.001
|
| 40 |
+
# preserve_target_weight: false
|
| 41 |
+
# add_self: true
|
| 42 |
+
# anneal_steps: 1929
|
| 43 |
+
# scalar_temperature: 1.0
|
| 44 |
+
# max_sequence_length: 8192
|
| 45 |
+
mapping: "last_aligned"
|
| 46 |
+
|
| 47 |
+
training:
|
| 48 |
+
learning_rate: 0.0001
|
| 49 |
+
weight_decay: 0.01
|
| 50 |
+
num_epochs: 1
|
| 51 |
+
max_length: 2048
|
| 52 |
+
device: "cuda"
|
| 53 |
+
scheduler_type: "linear"
|
| 54 |
+
warmup_ratio: 0.1
|
| 55 |
+
max_grad_norm: 1.0
|
| 56 |
+
gradient_accumulation_steps: 4
|
| 57 |
+
per_device_train_batch_size: 8
|
| 58 |
+
num_processes: 8
|
| 59 |
+
freeze:
|
| 60 |
+
- "teacher"
|
| 61 |
+
- "base"
|
| 62 |
+
seed: 42
|
| 63 |
+
|
| 64 |
+
output:
|
| 65 |
+
output_dir: "local/checkpoints/Q3-0.6B_Q3-4B-Base_general_500k_C2C"
|
| 66 |
+
save_steps: 500
|
| 67 |
+
eval_steps: 100
|
| 68 |
+
wandb_config:
|
| 69 |
+
project: "Rosetta"
|
| 70 |
+
mode: "online"
|
| 71 |
+
entity: "nics-efc"
|
| 72 |
+
run_name: "Q3-0.6B_Q3-4B-Base_general_500k_C2C"
|
| 73 |
+
|
| 74 |
+
data:
|
| 75 |
+
type: "OpenHermesChatDataset"
|
| 76 |
+
# type: "DollyChatDataset"
|
| 77 |
+
|
| 78 |
+
kwargs:
|
| 79 |
+
split: "train"
|
| 80 |
+
max_word_count: 2048
|
| 81 |
+
num_samples: 500000
|
| 82 |
+
train_ratio: 0.99
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/aggregator_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ca901bc8803e85cae986c4cd656874eda980384f8956f26692b91a00930195e
|
| 3 |
+
size 37815239
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c37b6a1bca77a587417a932c6b08ab422ef895c21adcaa013cf3423df29e5bf4
|
| 3 |
+
size 37815239
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:befd49e5176b4b913597244f97da7f709edd8c344843adb1e665db2c3cd55f33
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea1e072a0a736a09e0cf7a7edf36dbe7c69101a7249d8138e64b45e1a0afa354
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89bea162bff880010939806fa1c5ea6be7b19ef950448818d69528c649192033
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03b2b6856fa7d90d6b9298b584e9cdfe0b2f85da492596bde250f85c41fbb3fc
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bb4bf56e219a51545a4bff5c6270555de629e1616f87b9fa0f658ccf27254a9
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7af755b043865aa4c27148565c9d20437363e83afa1ba007a4adcbe6b33ae77
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e9c310b2bc90edf68c0f0914a00897809bed4767da049b2155767a324c1325b
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0055c8e4aebda13f50d5bfa8f8ca0ad4badaa8079923a310c4d76b14f0037d8e
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61065ef9e80df17da7ab6ee2cc810c996eeef9d8fc068eeb384240d5776164fa
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7253a544a70f2721a6072f3a2ea931a4b2af120312d052d9ba2bd7bebb450875
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fc8b03defc611cbc58da01cfb67e1a84203e0675f1e9bf4fa5ffcac34851b4b
|
| 3 |
+
size 37815239
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c621c50d542d368bf1f127918020afd9823bc6d39057b79e8a0ff368a46fce1
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87a0e4f1ad4290c336f1780dd4fdaba6f6ce85a221a3111ec1d477289735ca8e
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d3be267fa3472fcac11fbd8252f3a09b8bf050945ec1481f29fca096d815d95
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e6207c7442f68832c6678a47c508dfae574c133b3915ad959a1f764eef8d547
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d382d0d1b64f303279054234fae9588bcd0ef3ab324c2bde2475d610ca99b16a
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33161138959e0125801b74b5be397029733729938edf4715760cdde8e729af0d
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3377bdc808175f9b3d239927424481e98039d445d5622ae0281ac7a5e7734647
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ac9a6e50aa6886c7cee791b5ce6d2c3f7c806da0a88c61bf0678a34d0e9e348
|
| 3 |
+
size 37815276
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02b1bb740e0983f75d32c680f742d96722d3c74ad3075847957647984bfe7071
|
| 3 |
+
size 37815239
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a59d32df77c5c6b88388edd833866284f8e53b557cf2c6e4cbf15db6f7909c6
|
| 3 |
+
size 37815239
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 128,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1953,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e2a169095e309e749a22f1ebcd9dae026577b5d6ffd8a4b5685a9a3a500c04d
|
| 3 |
+
size 37815239
|