diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/config.json b/qwen3_0.6b+qwen3_4b_base_Fuser/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9347f297f40602a2476511e795cde3723d0d909 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/config.json @@ -0,0 +1,82 @@ +model: + base_model: "Qwen/Qwen3-0.6B" + teacher_model: "Qwen/Qwen3-4B-Base" + include_response: false + is_do_alignment: false + alignment_strategy: "first" + projector: + type: "C2CProjector" + params: + hidden_dim: 1024 + intermediate_dim: 1024 + num_layers: 3 + dropout: 0.1 + initial_temperature: 1.0 + final_temperature: 0.001 + anneal_steps: 1953 + # projector: + # type: "AllInOneProjector" + # params: + # hidden_dim: 1024 + # weight_hidden_dim: 1024 + # num_layers: 3 + # dropout: 0.1 + # activation: "gelu" + # use_layer_norm: true + # use_residual: true + # use_swiglu: true + # use_concat: true + # gate_granularity: "scalar" + # gate_depends_on_input: false + # gate_input_features: "target_key" + # gate_init_value: 0.0 + # weight_granularity: "head_merged" + # weight_depends_on_input: true + # weight_input_features: "target_projected_key" + # weight_init_value: 0.0 + # use_gumbel: true + # initial_temperature: 1.0 + # final_temperature: 0.001 + # preserve_target_weight: false + # add_self: true + # anneal_steps: 1929 + # scalar_temperature: 1.0 + # max_sequence_length: 8192 + mapping: "last_aligned" + +training: + learning_rate: 0.0001 + weight_decay: 0.01 + num_epochs: 1 + max_length: 2048 + device: "cuda" + scheduler_type: "linear" + warmup_ratio: 0.1 + max_grad_norm: 1.0 + gradient_accumulation_steps: 4 + per_device_train_batch_size: 8 + num_processes: 8 + freeze: + - "teacher" + - "base" + seed: 42 + +output: + output_dir: "local/checkpoints/Q3-0.6B_Q3-4B-Base_general_500k_C2C" + save_steps: 500 + eval_steps: 100 + wandb_config: + project: "Rosetta" + mode: "online" + entity: "nics-efc" + run_name: "Q3-0.6B_Q3-4B-Base_general_500k_C2C" + +data: + type: "OpenHermesChatDataset" + # type: "DollyChatDataset" + + kwargs: + split: "train" + max_word_count: 2048 + num_samples: 500000 + train_ratio: 0.99 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/aggregator_config.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/aggregator_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/aggregator_config.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..0658068e666b0d21397db19ed458ee5c48031fcf --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca901bc8803e85cae986c4cd656874eda980384f8956f26692b91a00930195e +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..440fb0a9e25b2b29bb5274c9c657eab7d7c41917 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37b6a1bca77a587417a932c6b08ab422ef895c21adcaa013cf3423df29e5bf4 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.pt new file mode 100644 index 0000000000000000000000000000000000000000..21c20cbf4288f9fbb51907070a7e49dc4c1274fb --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:befd49e5176b4b913597244f97da7f709edd8c344843adb1e665db2c3cd55f33 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..9825b032cf513ea0db6199062e493a610db2dced --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1e072a0a736a09e0cf7a7edf36dbe7c69101a7249d8138e64b45e1a0afa354 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcb8dd4bf81570829f2ad08196fea74466ed55a7 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89bea162bff880010939806fa1c5ea6be7b19ef950448818d69528c649192033 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b4d02b856777bde63dc2eec327f6c1438d44c2f --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b2b6856fa7d90d6b9298b584e9cdfe0b2f85da492596bde250f85c41fbb3fc +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..94c9828df61897dba9d1a5ac81fca5c4e702e369 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb4bf56e219a51545a4bff5c6270555de629e1616f87b9fa0f658ccf27254a9 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dc985b0c70d106864db62daff7248c0ca02f06b --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7af755b043865aa4c27148565c9d20437363e83afa1ba007a4adcbe6b33ae77 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c4c2a938430422f0b6c163ddc662b5bb5a39f5f --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9c310b2bc90edf68c0f0914a00897809bed4767da049b2155767a324c1325b +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..5777272a6dd107e131c19dbb857f2cc76d06a71e --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0055c8e4aebda13f50d5bfa8f8ca0ad4badaa8079923a310c4d76b14f0037d8e +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..91abefa6623c03a5b034cc14f30b77d5ee9dc462 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61065ef9e80df17da7ab6ee2cc810c996eeef9d8fc068eeb384240d5776164fa +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..c762dddafde53807a0cd2879b526948a08eb8e63 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7253a544a70f2721a6072f3a2ea931a4b2af120312d052d9ba2bd7bebb450875 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0d9b3d6f77599374d2c4629a4b8b36ff0243dd2 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc8b03defc611cbc58da01cfb67e1a84203e0675f1e9bf4fa5ffcac34851b4b +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac7a2c19788bcca9bce076b7595d9d482ab206e3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c621c50d542d368bf1f127918020afd9823bc6d39057b79e8a0ff368a46fce1 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..2204725b4278d99db570b71a239b90a993f6c69f --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a0e4f1ad4290c336f1780dd4fdaba6f6ce85a221a3111ec1d477289735ca8e +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..a46de7153b43b1ea2fd43aabce28cfd54986124e --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3be267fa3472fcac11fbd8252f3a09b8bf050945ec1481f29fca096d815d95 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b45e5d824189ad6c3985935a0a5608f15e77f1b --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6207c7442f68832c6678a47c508dfae574c133b3915ad959a1f764eef8d547 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee206d69bedbabd32fe9130931df107dee5aec98 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d382d0d1b64f303279054234fae9588bcd0ef3ab324c2bde2475d610ca99b16a +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.pt new file mode 100644 index 0000000000000000000000000000000000000000..7115112294386e9f99a7f0cb1f265dc9b19a8c62 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33161138959e0125801b74b5be397029733729938edf4715760cdde8e729af0d +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..7263655a86b562e00fd35641195832b3dad64361 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3377bdc808175f9b3d239927424481e98039d445d5622ae0281ac7a5e7734647 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..e429892642b338a5c514e32f0b22334e9f234e24 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ac9a6e50aa6886c7cee791b5ce6d2c3f7c806da0a88c61bf0678a34d0e9e348 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..db232d1f3cb68f2380655131473d863fb89df1de --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b1bb740e0983f75d32c680f742d96722d3c74ad3075847957647984bfe7071 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c31c54d2b932976a0de2dd0ccc8b72e4dc87a67 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a59d32df77c5c6b88388edd833866284f8e53b557cf2c6e4cbf15db6f7909c6 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.pt new file mode 100644 index 0000000000000000000000000000000000000000..7042f2ae0b5156edd7d56ff0ea07c34a509ce1ba --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e2a169095e309e749a22f1ebcd9dae026577b5d6ffd8a4b5685a9a3a500c04d +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_6.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_6.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_6.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_6.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5018a73924181e11cf40ba740e18287b4d3b559 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54945bbc262b0d925a3ae29846db65a42a1058d94c75824abbf8100b1f35aa7 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_7.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_7.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_7.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_7.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_7.pt new file mode 100644 index 0000000000000000000000000000000000000000..107e10e1ea282c54f687f67010588f9fdb065d08 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338421b915b19aaa71e66641b6fce3bc3c67cecb2febbac0b1be8e824e25a273 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_8.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_8.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_8.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_8.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_8.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cb9394e3b3a7323cd8f9452be86c5b34eb38d5d --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371d7aaf5b2997e8296b5423f41df4dbf1d7549f426759b0396f450937fd6d0f +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_9.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_9.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bd347f24f021dcde48c5842f729fd3a26c88a9 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_9.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1953, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_9.pt b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..36be48d7db8fe99c83193a664e7b98a284130ae5 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6fc3b237e338039d1e753862e94a793581dce19e19b09f77b3d4108047049f +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_config.json b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a4f73cb214ce023556290d4e0cb9a6bb1932da05 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_base_Fuser/final/projector_config.json @@ -0,0 +1 @@ +{"0": {"1": {"0": [[8, 0]], "1": [[9, 1]], "2": [[10, 2]], "3": [[11, 3]], "4": [[12, 4]], "5": [[13, 5]], "6": [[14, 6]], "7": [[15, 7]], "8": [[16, 8]], "9": [[17, 9]], "10": [[18, 10]], "11": [[19, 11]], "12": [[20, 12]], "13": [[21, 13]], "14": [[22, 14]], "15": [[23, 15]], "16": [[24, 16]], "17": [[25, 17]], "18": [[26, 18]], "19": [[27, 19]], "20": [[28, 20]], "21": [[29, 21]], "22": [[30, 22]], "23": [[31, 23]], "24": [[32, 24]], "25": [[33, 25]], "26": [[34, 26]], "27": [[35, 27]]}}} \ No newline at end of file