diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/config.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c56d7d3698f7dbd4d06b91537563853efeb158ab --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/config.json @@ -0,0 +1,57 @@ +{ + "model": { + "base_model": "Qwen/Qwen3-0.6B", + "teacher_model": "Qwen/Qwen2.5-Math-1.5B-Instruct", + "include_response": false, + "is_do_alignment": false, + "alignment_strategy": "first", + "projector": { + "type": "C2CProjector", + "params": { + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929 + } + }, + "mapping": "last_aligned" + }, + "training": { + "learning_rate": 1e-4, + "weight_decay": 0.01, + "num_epochs": 1, + "max_length": 2048, + "device": "cuda", + "scheduler_type": "linear", + "warmup_ratio": 0.1, + "max_grad_norm": 1.0, + "gradient_accumulation_steps": 8, + "per_device_train_batch_size": 4, + "num_processes": 8, + "freeze": ["teacher","base"], + "seed": 42 + }, + "output": { + "output_dir": "local/checkpoints/0.6+math_C2C_general", + "save_steps": 500, + "eval_steps": 100, + "wandb_config": { + "project": "Rosetta", + "mode": "online", + "entity": "nics-efc", + "run_name": "0.6B+math_C2C_general" + } + }, + "data": { + "type": "OpenHermesChatDataset", + "kwargs": { + "split": "train", + "max_word_count": 2048, + "num_samples": 500000 + }, + "train_ratio": 0.99 + } +} diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/aggregator_config.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/aggregator_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/aggregator_config.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_0.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_0.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_0.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_0.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd4d55d02733100154327128dab866c5c976b7ff --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a04cc6aaf0951e2d1c02cc65d35b227c80c1dc0b3b48fcfb1223fbe4b605d93 +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_1.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_1.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_1.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_1.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bc18215098312b7cbe858957fb1d84978bebf1a --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384693a79009a82e1814e962e16a2198f8870f1fea6632d17980ecd13948377f +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_10.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_10.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_10.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_10.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_10.pt new file mode 100644 index 0000000000000000000000000000000000000000..685c1e9132d31dcfc500b333385e4f94cd0fbf85 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4719d57089eb68d187c79cd430a2cb3facf7f558e6026515eb7f551615e47c43 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_11.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_11.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_11.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_11.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..82822accb9d13f2a256bb3aec1ba85463a439233 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e24b13c4e0913a15a95124b9cf038eec4817d12fa41942076c729504890d894c +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_12.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_12.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_12.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_12.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_12.pt new file mode 100644 index 0000000000000000000000000000000000000000..674ced93f9aef48a6c9025295ee7e68dc55e2469 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab1ff921640073009591dd1cfdb9123e6b9972ce149cc8c4ea3a6cf78170e70 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_13.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_13.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_13.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_13.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_13.pt new file mode 100644 index 0000000000000000000000000000000000000000..08bb413d7642d515f502f8cb4ab4a797f3a7167b --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6740ef5c728556c99f3c80fd6be80ea4663d5933c09c18ef73750425a112f3e6 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_14.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_14.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_14.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_14.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcfdab0425702ba697d0d3843c85ef9b183f3300 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b12043f37bbc5545b2f53d7cc230b346f4577754dad4bfd3d9094debd967ae1 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_15.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_15.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_15.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_15.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_15.pt new file mode 100644 index 0000000000000000000000000000000000000000..e257495bb3bedccd7adc4add308bc4d59833ab2d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e7cbc661af2b845daef58381f9e492fd01360143a43e6f1cd834d59c3fa6a6 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_16.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_16.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_16.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_16.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_16.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e40b5edfa7c865bacaba4aeb6b05ff8cb55c734 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afdacd832765cff3b1da2c2bb5260ea129f96c10178be893fa9e21522c91e6ef +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_17.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_17.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_17.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_17.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..3838f7b29d6ae64014b5e09857a6ea70594262a3 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b703b1a7942f052388e4586cc1924b1d46234fd7c7d949ce2396df5b880f35d +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_18.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_18.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_18.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_18.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc5eaaeb111b842c34183209659b82917d232685 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342950f4c5922187897fce4d377291d9c94fc56219bd780d34f35066e8195faa +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_19.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_19.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_19.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_19.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2af89b473fb3e822cf03676f354e700f321cfb5 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19ffb1469f350ab999f49292c953cb46d2b92198de2e57f6c5422366b89246c +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_2.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_2.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_2.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_2.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..93e5d5ca6e59d1e4ea39da041433f72b58cab071 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8687fac1be13e65e0ab9216e84dd001f80943be4be6fee376c3922a3c38b1191 +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_20.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_20.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_20.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_20.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_20.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7d69339305fbb7c61faf6c852c93e67515fb1e9 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7148f304c760d0eca1cec0f07567e927fb1764a21985effe60981889a2c7772b +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_21.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_21.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_21.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_21.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..48de0592e56d4be95ed211d66c7c454db11ae6fc --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8239af67a58293570dc54b74e1ad48c0e3e2192593ea0c06f156fd93754a8eb +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_22.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_22.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_22.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_22.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..14556a5cf34fdd765d7f0c617aec3a918d5116c2 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6145d5a15e79876c5274976040eb99647a029ec964fb00fcf50ff582b5aeacd1 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_23.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_23.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_23.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_23.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_23.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4978c68911f7435aa5040bf9eba064f07a7edc1 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ed2217c4cdd98b4d21cd6abdba1e2f6e6f643c50e389eef51db64cf901838d +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_24.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_24.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_24.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_24.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b25b0639bd8a1c5b5794beba01e8141a97f3a64 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0708dc71f5bb708316d71eafab75ba41bb4c0b74e4b269e5e3d04a7bd1531d78 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_25.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_25.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_25.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_25.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_25.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2f471779997bc0e5bb0c9c47e4e9148cf03bc66 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e79a40e95aa61413085b4d92413a4e12eadc8e6c9b1d1302748e6cb9c189858 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_26.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_26.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_26.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_26.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc5e0deb17015610aa39da4829455e9ce0d51e98 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b99606b6ce97b5796b88984f81b8da0549bbf7495c34246e55031c83679ac22 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_27.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_27.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_27.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_27.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..326c0fffc33fbfc25187c2cbc7f6a4e984cfa548 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471d909fa34e1c955486bd001feef8b31db49b742a30a88192856da312e06932 +size 34669548 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_3.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_3.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_3.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_3.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..60f588881ab72b1984797ed7f903ed01ff702dd5 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe9e0e3b93edca1b8c75c8b292c595f7a7d510657119f9282a9e7bd4241dabf +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_4.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_4.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_4.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_4.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..633b09893c2ad6ce5952bcf54c67f3ce4428c43d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5c8c5eaafbe17345f95b27ca0a2d0951fdf0b4eed7d33442f3f492ce505a03 +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_5.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_5.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_5.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_5.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_5.pt new file mode 100644 index 0000000000000000000000000000000000000000..907cfc395ca9b474036d4ca4018bafb7727b66a1 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa95e7be8d887ec04db835bcab9c7b8b5cae3c4fc58f7b71e8f3b25d94619bc9 +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_6.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_6.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_6.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_6.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3a3040147498d92de79757447570442d992910b --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb65f09b3ef3d202594008362524e4805476efc5dbe1fdef8968633427ba217 +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_7.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_7.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_7.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_7.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_7.pt new file mode 100644 index 0000000000000000000000000000000000000000..294d92e7d7c983fbc4aadcfd098c2ccada2e5704 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:855c50bc27f6c8355fbf3ff936f2630fcb0de5a58be970c70c68b3d9b8daf721 +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_8.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_8.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_8.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_8.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_8.pt new file mode 100644 index 0000000000000000000000000000000000000000..684af0e44da5ef2f73ee09465ab04666238873a7 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cce9688924ec44ff3c4331b318ba51f49f2ee6264289b812d5398f4bc00412d +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_9.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_9.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_9.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_9.pt b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..24452e18d83a22e9218e0936f93e90fcf4197eb8 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe4ed986d60b18829cdd22f0a85d04c32ff34e420200b1964e122837ab1ca8a +size 34669511 diff --git a/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_config.json b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c153f34c2443eba2f2e95e37ad01b879dbab1b26 --- /dev/null +++ b/qwen3_0.6b+qwen2.5_1.5b_math_Fuser/final/projector_config.json @@ -0,0 +1 @@ +{"0": {"1": {"0": [[0, 0]], "1": [[1, 1]], "2": [[2, 2]], "3": [[3, 3]], "4": [[4, 4]], "5": [[5, 5]], "6": [[6, 6]], "7": [[7, 7]], "8": [[8, 8]], "9": [[9, 9]], "10": [[10, 10]], "11": [[11, 11]], "12": [[12, 12]], "13": [[13, 13]], "14": [[14, 14]], "15": [[15, 15]], "16": [[16, 16]], "17": [[17, 17]], "18": [[18, 18]], "19": [[19, 19]], "20": [[20, 20]], "21": [[21, 21]], "22": [[22, 22]], "23": [[23, 23]], "24": [[24, 24]], "25": [[25, 25]], "26": [[26, 26]], "27": [[27, 27]]}}} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/config.json b/qwen3_0.6b+qwen3_4b_Fuser/config.json new file mode 100755 index 0000000000000000000000000000000000000000..9dfca46c7ca6701192788a60dd81ae8eb03609d3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/config.json @@ -0,0 +1,57 @@ +{ + "model": { + "base_model": "Qwen/Qwen3-0.6B", + "teacher_model": "Qwen/Qwen3-4B", + "include_response": false, + "is_do_alignment": false, + "alignment_strategy": "first", + "projector": { + "type": "C2CProjector", + "params": { + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929 + } + }, + "mapping": "last_aligned" + }, + "training": { + "learning_rate": 1e-4, + "weight_decay": 0.01, + "num_epochs": 1, + "max_length": 2048, + "device": "cuda", + "scheduler_type": "linear", + "warmup_ratio": 0.1, + "max_grad_norm": 1.0, + "gradient_accumulation_steps": 8, + "per_device_train_batch_size": 4, + "num_processes": 8, + "freeze": ["teacher","base"], + "seed": 42 + }, + "output": { + "output_dir": "local/checkpoints/0.6+4B_C2C_general", + "save_steps": 500, + "eval_steps": 100, + "wandb_config": { + "project": "Rosetta", + "mode": "online", + "entity": "nics-efc", + "run_name": "0.6B+4B_C2C_general_OpenHermes_500k" + } + }, + "data": { + "type": "OpenHermesChatDataset", + "kwargs": { + "split": "train", + "max_word_count": 2048, + "num_samples": 500000 + }, + "train_ratio": 0.99 + } +} diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/aggregator_config.json b/qwen3_0.6b+qwen3_4b_Fuser/final/aggregator_config.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/aggregator_config.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_0.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_0.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_0.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_0.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..a6a6211cdd0b72fa74c680b26e9d3d4dc16e3326 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f60f3b3e5fd27a96cee8d9d30de8bfdd2a88bc041ae70ade8cb9e29c06d5e4a8 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_1.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_1.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_1.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_1.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_1.pt new file mode 100755 index 0000000000000000000000000000000000000000..f6a050a06413afb44ecb66e8f1093d108f11876b --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508cecd38c2bc90679345143545698d9bc8e9395656e708e0d1e9bccd289f772 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_10.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_10.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_10.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_10.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..8f354748249855192a81ec29375860ec808b2be2 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd213bfaf675983799856c3de852458f9406f8d611f9f6bf59c8f4c30d4aadb3 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_11.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_11.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_11.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_11.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_11.pt new file mode 100755 index 0000000000000000000000000000000000000000..6fc6a4d01adebe7ac864160e9498320da6db964e --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868fe1db0bb391efd4f363aa5a377f24f7d4fb2d8fae7d74fdf45d5ef6915e90 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_12.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_12.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_12.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_12.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..d83007b931d7c16af32344706df5f0864ede548c --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe371ec5638e14e305d01a02c264676c619aa6175722f90202ee2685f4ed787 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_13.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_13.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_13.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_13.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_13.pt new file mode 100755 index 0000000000000000000000000000000000000000..355a2e456c9b8492fe917e8c64405a1b90902efa --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3c1d4a3fa21db927dcb05e9134c4cb4886799b21669d5093e86d4888489bc3 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_14.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_14.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_14.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_14.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..3dab390a15aa78cecc44a735e6924e37c8344dad --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a0eed347c4f79ac96ad9bd018df3324f7decc1c9967e4a4dba1f4e6b645316 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_15.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_15.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_15.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_15.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_15.pt new file mode 100755 index 0000000000000000000000000000000000000000..210a5a46c7539a088440815588b5136596b84011 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b07e6bd209c7f26dd7b95c2c91a50e7dadc7405502eed9d8ebfb5243f0234ec +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_16.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_16.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_16.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_16.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..1a1f016a8c47e7a21b985f07c08cbb771dbbd76b --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d295f7559b62db4b2db7687e9659b43938c1e8c7293afc621611b6f1c31c4b0c +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_17.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_17.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_17.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_17.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_17.pt new file mode 100755 index 0000000000000000000000000000000000000000..ea09682205dfd904583ccd94006b22b9951e48a2 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3509c5e35cd9e5a061c25ae681eb5709168edf24a9d653c4a1787640000365db +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_18.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_18.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_18.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_18.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..b075d7cdaa45952d1ba4b7512082fa3f45fd9953 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce2382cf9a80455a3df52030c59eb9f26dcb76a12dcfa723e8588bad8a59e24 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_19.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_19.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_19.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_19.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_19.pt new file mode 100755 index 0000000000000000000000000000000000000000..33fed4a867f79117ff514f96468b72469f6804fd --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b312020a732ba6d100f6f0f45603d787344d653f539ae963660772f27f31cb2 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_2.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_2.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_2.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_2.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..28fdc1a71460602c9e866dda045cf454ba77aa30 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45133679e48816abe8aa583b88bb7d0489a7bc67a8ce265eb8a51a72fb083119 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_20.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_20.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_20.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_20.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..db97c9f6a091e22345d8ae35774fabebb9aac970 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d82822b13295aade5bf45d1aa4c17e7a72b6a4cbaddbac47d0cc1b4c607698 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_21.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_21.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_21.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_21.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_21.pt new file mode 100755 index 0000000000000000000000000000000000000000..b7184f7e0972640143d2e5eb9c95b011a2c02f61 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2abe61c562fd887f4f8bf07439e327bfb7664e3e74568a90912a08863673287 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_22.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_22.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_22.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_22.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..949339970a2a718ea979bfe09fa9d90670a6d4b4 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f980b0dbe6d9bc9e814754884ad422d6359154ed84d7bf08ce1cb6b825733aab +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_23.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_23.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_23.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_23.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_23.pt new file mode 100755 index 0000000000000000000000000000000000000000..e9de276c8eef86a47a1c9eb456a397d9c4a6290a --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38d12e15c25084753292a3e678e35e5d37a245132a809584f70f2d43fb46114 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_24.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_24.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_24.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_24.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..98f6ead65e3727404c3875d488edcdfd9dca6614 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e66fe4212ba168cd945e70b841ef4ed3b47a643ad310bfa967257bb1f4dd35 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_25.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_25.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_25.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_25.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_25.pt new file mode 100755 index 0000000000000000000000000000000000000000..cb1b6e83e6e1699f444c70fb6ab9299321bfa517 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7576103b34f044c15ab80a84eaed324eb1d58b60d5ac3118b3c8d411276de3ab +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_26.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_26.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_26.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_26.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..9a011821b5e4572425fbea0d9f757b6d198248bc --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be5af42614fc204caf43135e7f7ee44c467020454872310a5b8a84e0e0f1cf67 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_27.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_27.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_27.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_27.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_27.pt new file mode 100755 index 0000000000000000000000000000000000000000..700365443f14927499cca3c0c46a544395c90c11 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a029c00e59bb39114a4b1b6489e897bc8a3511ca854430a61e69ed5a9cf17e3 +size 37815276 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_3.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_3.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_3.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_3.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_3.pt new file mode 100755 index 0000000000000000000000000000000000000000..8a7148d146ddb6f599a8e5bd0f8a3629b5b377fe --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b7c8789a5368a2797262fd8aef2d473480914f3d0dae98291494f7f62a107c +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_4.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_4.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_4.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_4.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..018e18dfbdddc34c8d08ed61cf77338b36671085 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395a8d41ca5e528fc1d5e54e0efe0a59023572c83a55932df39b23135db7068f +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_5.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_5.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_5.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_5.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_5.pt new file mode 100755 index 0000000000000000000000000000000000000000..99279a403e064e312c4011266ed5535dd0c01b03 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc895a79d698aceaa4d368385baa54919572496a58c644c8dc6cc62d0322647 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_6.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_6.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_6.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_6.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..50118ebda47f7d56a6d8cfb68a471b84ecf1ca2c --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb86d9389296ec32127edee1ae92ad52ffa8a051b1b830c951f45e04b9f4286 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_7.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_7.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_7.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_7.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_7.pt new file mode 100755 index 0000000000000000000000000000000000000000..aecc7a0661e81d98f6cc1c879c25bbc88414f7db --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c16e10a36cd760f7e66b3dd3db5b26111794e24ef5c6524ead45a3ed8f022c +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_8.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_8.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_8.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_8.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..21903436074901385c17778601cc99d2b7849b9b --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae89f748988e71a56835ac2a22e0f4163ae73542f411875e345ad1731731eca +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_9.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_9.json new file mode 100755 index 0000000000000000000000000000000000000000..ffedd795aa2bceb000bde26948bb91a6242fe5f3 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_9.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_9.pt b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_9.pt new file mode 100755 index 0000000000000000000000000000000000000000..b07501169ca71581b8917b30b236a6cf3b2e1053 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad72fbcd795338575fc4888ae21920236d95657201d430be0296c97dba52554 +size 37815239 diff --git a/qwen3_0.6b+qwen3_4b_Fuser/final/projector_config.json b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_config.json new file mode 100755 index 0000000000000000000000000000000000000000..a4f73cb214ce023556290d4e0cb9a6bb1932da05 --- /dev/null +++ b/qwen3_0.6b+qwen3_4b_Fuser/final/projector_config.json @@ -0,0 +1 @@ +{"0": {"1": {"0": [[8, 0]], "1": [[9, 1]], "2": [[10, 2]], "3": [[11, 3]], "4": [[12, 4]], "5": [[13, 5]], "6": [[14, 6]], "7": [[15, 7]], "8": [[16, 8]], "9": [[17, 9]], "10": [[18, 10]], "11": [[19, 11]], "12": [[20, 12]], "13": [[21, 13]], "14": [[22, 14]], "15": [[23, 15]], "16": [[24, 16]], "17": [[25, 17]], "18": [[26, 18]], "19": [[27, 19]], "20": [[28, 20]], "21": [[29, 21]], "22": [[30, 22]], "23": [[31, 23]], "24": [[32, 24]], "25": [[33, 25]], "26": [[34, 26]], "27": [[35, 27]]}}} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/config.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/config.json new file mode 100755 index 0000000000000000000000000000000000000000..919f3eadf18925cd71a4e2c4e175daa2baedd5eb --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/config.json @@ -0,0 +1,57 @@ +{ + "model": { + "base_model": "Qwen/Qwen3-1.7B", + "teacher_model": "Qwen/Qwen2.5-1.5B-Instruct", + "include_response": false, + "is_do_alignment": false, + "alignment_strategy": "first", + "projector": { + "type": "C2CProjector", + "params": { + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929 + } + }, + "mapping": "last_aligned" + }, + "training": { + "learning_rate": 1e-4, + "weight_decay": 0.01, + "num_epochs": 1, + "max_length": 2048, + "device": "cuda", + "scheduler_type": "linear", + "warmup_ratio": 0.1, + "max_grad_norm": 1.0, + "gradient_accumulation_steps": 8, + "per_device_train_batch_size": 4, + "num_processes": 8, + "freeze": ["teacher","base"], + "seed": 42 + }, + "output": { + "output_dir": "local/checkpoints/1.7B+1.5B_general", + "save_steps": 500, + "eval_steps": 100, + "wandb_config": { + "project": "Rosetta", + "mode": "online", + "entity": "nics-efc", + "run_name": "1.7B+1.5B_general_OpenHermes_500k" + } + }, + "data": { + "type": "OpenHermesChatDataset", + "kwargs": { + "split": "train", + "max_word_count": 2048, + "num_samples": 5000 + }, + "train_ratio": 0.99 + } +} diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/aggregator_config.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/aggregator_config.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/aggregator_config.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_0.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_0.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_0.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_0.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..a4a30fa3dfb30b279c6625b04b1d2c5c0fc5269a --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7cf4ba2f1c4acab5faaacf0fc03faf6461ce96b546eae94d28a796a766d66a +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_1.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_1.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_1.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_1.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_1.pt new file mode 100755 index 0000000000000000000000000000000000000000..c7ac9fab0015bc3dd4cc57ab94222d17d680555c --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098507746afcf6408473b0962cdcacfebca06b74b214a8aa773a340b881175e6 +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_10.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_10.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_10.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_10.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..cf27e8b8f38a4cb8d133c08b7ce4d83ec54349e9 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9872fb783e666023cd8da5ca1990ff234200f325663bb4d3744f5e37143b35ad +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_11.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_11.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_11.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_11.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_11.pt new file mode 100755 index 0000000000000000000000000000000000000000..55bcf0da177e7954e9b856ec4fbb16ac1cc2f1c4 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0fa9444f6c081c665246cf3570ee5fb65a187fe37030c163bf562d565e94637 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_12.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_12.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_12.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_12.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..208459cf0cd6f8bc4eccc567eaa11d82e3a7b704 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13885b09585368ecd14edc5114173fcd2f9f71b26cf1463982fa181b43f11b65 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_13.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_13.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_13.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_13.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_13.pt new file mode 100755 index 0000000000000000000000000000000000000000..b15a13c3fddcb8f37d0125b850936ea6d12a8443 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdf03e14612949a1ecf8d4efa202b4a6db0682bb7c8b9e3ca7c9d8427591881 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_14.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_14.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_14.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_14.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..ade2735403023e3ab5266e659b770c7d84c6c72a --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6449850c0a1615967b23bd3176b0092d79859619580c0598439e4294dd1f9284 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_15.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_15.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_15.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_15.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_15.pt new file mode 100755 index 0000000000000000000000000000000000000000..85e34654a1d0502a89880b0dbe3aa204ec90a411 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4336c5c14d0152191a73873d8f87b015b54eacd2fe800e589340bd749a05ff5 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_16.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_16.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_16.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_16.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..0de987ae844a29ee45b81342584c8d1b0c325edf --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:814a70e5f207f061621b5fee2acbf24a496f3869e22e042e7161e92b73e76ebb +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_17.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_17.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_17.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_17.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_17.pt new file mode 100755 index 0000000000000000000000000000000000000000..3e7f4b80e57efefe8149de3d650cab27d6f4ef3c --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a1a759a22602dd01b4842f08aeaf68e60009a60e861c2984d900e9b125b2d0 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_18.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_18.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_18.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_18.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..8bf272b50606b68ea8bf1e170e30a0acd8889c9a --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250c63fe891d59db8bed294d26dc10afa9051a86b8d9ab213b215361f35cb0fd +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_19.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_19.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_19.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_19.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_19.pt new file mode 100755 index 0000000000000000000000000000000000000000..9f0bee2a1f51f91ad8b916e2a69cb8d6548a5dfa --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:212675743a04aeea7e4f95ec6f70d6af7affc1cc4bb9d5a298cd607c60cc2a45 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_2.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_2.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_2.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_2.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..fd5bf5e52b6b3acf6a3119753ca78e1f61d53f0b --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a9822edce3e7228e901918488de4ba7fd2a40b76cf8f25b01abdf057192347 +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_20.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_20.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_20.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_20.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..2686e1a84804588f048a7f9720df1c2a050a173a --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3043888e37933a61880e31e9253b34cb885e5dfd97329621056696b1a97db44 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_21.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_21.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_21.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_21.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_21.pt new file mode 100755 index 0000000000000000000000000000000000000000..e3f6e0a21ce0c33bb1f466b0085713e071f53043 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfafd93ec8b25896677c36038dcb14d9eb8dbc64d0b92cfc91295613d1fcb772 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_22.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_22.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_22.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_22.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..1e0bb0aff964c336164b47b744e815af621cf504 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28732c8c54df929a572cb35ceaa9790f801cb2d5894e97ba75e95f24759f81c4 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_23.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_23.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_23.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_23.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_23.pt new file mode 100755 index 0000000000000000000000000000000000000000..140f64ccc7132612e6b537280d2de6d9bd78a786 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2b2e9d2f1c89e3c5e88d442aad1204bad708ebb75e8e545fb7e9a679e60db3e +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_24.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_24.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_24.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_24.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..8b47faf941d9872dc4af815425271917fc7a4d66 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0440b9875c801d5b14a31ffc07e9efcf068355d06496f58ac9ffccfd9b16f8e9 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_25.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_25.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_25.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_25.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_25.pt new file mode 100755 index 0000000000000000000000000000000000000000..9fd491a5d1de05f641cc2613c44c4f3c82d63f95 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7d877b2afaf0167a251b7c6aecd25fa6ec27f4c80f6dbfc3c24fdeb4066c0c +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_26.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_26.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_26.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_26.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..9b629eaa4a3e2ac461a3d0a374de97c11dd5b230 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9180b7d634c5368a84b7e7c1928d777f8f3c0d0d02e25b5a3ad586307209bf12 +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_27.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_27.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_27.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_27.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_27.pt new file mode 100755 index 0000000000000000000000000000000000000000..c2a6b1521c37157e7e900f0bf1bf8c7965e9686c --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b034f00a11d6a207c01016b1633ceccc81c87080f59c45214c488c06a247342e +size 34669548 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_3.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_3.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_3.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_3.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_3.pt new file mode 100755 index 0000000000000000000000000000000000000000..2edf4c5c92cdf03e70fb58168b51fe9b2e0517f3 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb8f20c7c52cbad60d1f21e7d3772c2d4a99b0ec1e8bb14d43bbb50fa9c2432 +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_4.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_4.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_4.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_4.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..ef3c21f697be63d9aa0384dd91a3830743cf3332 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d2ecf8d51aa482fef37783534f009cb71eab3371049beed9641abd0efd9696 +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_5.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_5.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_5.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_5.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_5.pt new file mode 100755 index 0000000000000000000000000000000000000000..27a9848b8e56ca25fdeee0aa31932c4fdf885db2 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137865e31f3bb3e2a4860eb4aaf68e5d7134139e6b6de6109218eeb9c0464626 +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_6.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_6.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_6.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_6.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..5cbb315f46bbc30696af2c20623cccaf972b3240 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843c0fca52b162066e68103e6e3051f3b35231c58b85b24a5c43a1eb06cd206f +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_7.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_7.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_7.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_7.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_7.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a2c8e7d18365f5073050d97ec94f090f2af92ba --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d7ff4571b3fb60b4f68e13bfb0abf6156d2f8dbba7e6f0cb62afa457ddc3cc +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_8.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_8.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_8.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_8.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..0a9241b59307d81c512018ecf59b5ba0768ac6fb --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7ada7ac8a9af7672cfeab93687e8f4714ded281a365ea78dd5d0d166d8b482 +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_9.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_9.json new file mode 100755 index 0000000000000000000000000000000000000000..fc3d7f673ebe95e77767e24533dceea80e176d7d --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_9.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 2, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_9.pt b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_9.pt new file mode 100755 index 0000000000000000000000000000000000000000..684e6efbf7487455cbe084ddbddcc409445044dc --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68cc551ba971a75cb603e65efb76ad0e92bbc6ea450d8fdf76656b624c9cd934 +size 34669511 diff --git a/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_config.json b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_config.json new file mode 100755 index 0000000000000000000000000000000000000000..c153f34c2443eba2f2e95e37ad01b879dbab1b26 --- /dev/null +++ b/qwen3_1.7b+qwen2.5_1.5b_Fuser/final/projector_config.json @@ -0,0 +1 @@ +{"0": {"1": {"0": [[0, 0]], "1": [[1, 1]], "2": [[2, 2]], "3": [[3, 3]], "4": [[4, 4]], "5": [[5, 5]], "6": [[6, 6]], "7": [[7, 7]], "8": [[8, 8]], "9": [[9, 9]], "10": [[10, 10]], "11": [[11, 11]], "12": [[12, 12]], "13": [[13, 13]], "14": [[14, 14]], "15": [[15, 15]], "16": [[16, 16]], "17": [[17, 17]], "18": [[18, 18]], "19": [[19, 19]], "20": [[20, 20]], "21": [[21, 21]], "22": [[22, 22]], "23": [[23, 23]], "24": [[24, 24]], "25": [[25, 25]], "26": [[26, 26]], "27": [[27, 27]]}}} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/config.json b/qwen3_8b+qwen2.5_7b_Fuser/config.json new file mode 100644 index 0000000000000000000000000000000000000000..68bab2b788a2df6ff2ac173fc25a91fd3c68c466 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/config.json @@ -0,0 +1,57 @@ +{ + "model": { + "base_model": "/share/public/public_models/Qwen3-8B", + "teacher_model": "/share/public/public_models/Qwen2.5-7B-Instruct", + "include_response": false, + "is_do_alignment": false, + "alignment_strategy": "first", + "projector": { + "type": "C2CProjector", + "params": { + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894 + } + }, + "mapping": "last_aligned" + }, + "training": { + "learning_rate": 1e-4, + "weight_decay": 0.01, + "num_epochs": 1, + "max_length": 1024, + "device": "cuda", + "scheduler_type": "linear", + "warmup_ratio": 0.1, + "max_grad_norm": 1.0, + "gradient_accumulation_steps": 8, + "per_device_train_batch_size": 4, + "num_processes": 8, + "freeze": ["teacher","base"], + "seed": 42 + }, + "output": { + "output_dir": "local/checkpoints/8B+7B_C2C_general", + "save_steps": 500, + "eval_steps": 100, + "wandb_config": { + "project": "Rosetta", + "mode": "online", + "entity": "nics-efc", + "run_name": "8B+7B_C2C_general_OpenHermes_500k" + } + }, + "data": { + "type": "OpenHermesChatDataset", + "kwargs": { + "split": "train", + "max_word_count": 1024, + "num_samples": 500000 + }, + "train_ratio": 0.99 + } +} diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/aggregator_config.json b/qwen3_8b+qwen2.5_7b_Fuser/final/aggregator_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/aggregator_config.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_0.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_0.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_0.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..e448179ad5d218413443b161436c59638b22bbb6 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe39a8e9e60727ac074344236354d1c70ed4a841e917724501ad876bc109c328 +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_1.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_1.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_1.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7bb576eaad9713c41c055cf79b67cb94c587efb --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244c324cbd6285f71ebcf0297d8273324cc291866b64fc9f22c8cc6236fa1b6e +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_10.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_10.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_10.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_10.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_10.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ef31410a6c568d18f69c6190fcf20c03907e3a2 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba6fdf8ea07ef52a9eb0d4db9185c72618431d6f6e2a118657edd4418c1d7ff +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_11.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_11.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_11.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_11.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..3638dfbf6376c8de145db3dea65bb38166640107 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e843712bfff97da39e388b16a0dcd1db0131880aea98fbabb4cbb9157c07cfe9 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_12.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_12.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_12.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_12.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_12.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c88cb44a8322ebee16020629fef9afc6317af83 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68b5a0ee41078e96a966f26eb4f47145ef19e84d4962998fa8b64273886f4ab +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_13.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_13.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_13.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_13.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_13.pt new file mode 100644 index 0000000000000000000000000000000000000000..c482c3444b6663c27952eff62c26dd6ef9edddc9 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8dec1e15d7c4e46ca43be02aeaf25b5956f05917abb2507a94c99c0abb49c61 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_14.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_14.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_14.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_14.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f8487c0bc0019cdc6109ef0254f374f8454c6ec --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055d8c24c5338fdb44233c3b576bc960dff17625a911c20396fdcd7b5cdd01fe +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_15.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_15.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_15.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_15.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_15.pt new file mode 100644 index 0000000000000000000000000000000000000000..c480fdc7085ef8edd9a0d583bc263de58e8a7de2 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55da942a8d412896a0b0eb6ae7e8277038b4fe26978c0db120f57f7107165852 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_16.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_16.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_16.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_16.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_16.pt new file mode 100644 index 0000000000000000000000000000000000000000..703a753a096811379216be63fdea004e732f8cfc --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df78950347c8feef5571b0744facbb96f487b7cb41ec90e696a6f4b144f240c6 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_17.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_17.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_17.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_17.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..41e76b272dc55d6b9dc8f68d5611872a379bffb6 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6952d46dd43bfc08c748d3ec6d5016d750d616a487137d0fede5d2ee61b5faa8 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_18.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_18.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_18.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_18.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb8b8139a250feea909c49397c4fd32534ade08d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10bbda67ad64451ecb99819db4de9f41b0c89e05803f3fa555f2fa354e6a82d9 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_19.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_19.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_19.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_19.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..40e6829343cb4387b0200def2a51d0e8a8a90fe9 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73788a835995a3028aaa523dac095698983a4bd864fe5e139e1d154d0f0f77df +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_2.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_2.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_2.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_2.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..397eb914bfea3bf51a4fa3303c6cbc53f9af8eae --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653b668cd029ab7675abcc7c6a4c70fb8bde61d38ef80c00224a36dc8812839a +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_20.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_20.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_20.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_20.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_20.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fe8ff9b0a5114664616eb07e51c2097c5f5a5ba --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab33baa6fb40c717c5d91fc18e7c90751fe37363d71937110c841dbaa78546d3 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_21.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_21.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_21.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_21.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a8f283b73ee1e5d79a1af159ca12d23f0c5511c --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107004b912c7974bd54650ef467e840aef5338af94cd8ae0214509ff95ad5b92 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_22.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_22.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_22.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_22.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad93bc50850c2e5266113198328a3b30a6be9b38 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ec72c167b7e359aa9284e9baef9eaacea9d4fa98c2d4a60199abee75c395a6 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_23.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_23.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_23.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_23.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_23.pt new file mode 100644 index 0000000000000000000000000000000000000000..b77cd9ef2d8d6c0c9b337ffd24e92be9fdce48a1 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9d5f9235dcd5adc88d2a267c146a15d8a22cc1b35a35b95da14a8bc607b820 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_24.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_24.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_24.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_24.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..0187419aa54bf8ebc5c9bdf5053c0722521f4544 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e281dd147a49937ba10889dd28b385ca17a9a1a5c0a2a2a7ddbda189325bf72 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_25.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_25.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_25.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_25.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_25.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed187d7e7deb87a98f7aecc639e681e40aa938e6 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ffb4ab8c3407109cf2a400ea189368797bb34eb84086819807cd3882d098fa +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_26.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_26.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_26.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_26.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f5a6bbf95ba617e3bf7ad30d845e2a420ce4e4a --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e62b0fd6cb2aeaf8f5f626535ea97242df260138afbda0b9ca7fce37ce58c4 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_27.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_27.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_27.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_27.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..aeec6d03d952037792a4d245b47041c21ece5cbe --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b95d8380f1b39848218c33f0ff99fd3d97168eb08d35d3ae159cafae4ed7f0f +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_28.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_28.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_28.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_28.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_28.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ac4f2186112fc9b86ecfbe22adbfd7614ba3aa7 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27a01fc641bc9819085441ffedc7a7105e2c10849f3d39f7bb664ab7a5866e5 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_29.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_29.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_29.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_29.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_29.pt new file mode 100644 index 0000000000000000000000000000000000000000..7836e38a72b37f46d7fa435fdae04c9881f12ffa --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3719886bb59c1cf039ded4d31330d04ca155acf73e10592b0f0973910c184740 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_3.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_3.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_3.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f28b7617f5818d394806d851bd50517629efa29 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f4d25fd433d7cd1aabe1903697d81449002a67af47eda307739aef2ec04b42 +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_30.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_30.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_30.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_30.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..b84f6654d2ce2ad8b1669c117b5b35481d5a8924 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c5ae191549f80b56f67dda61716060ebdbf6c19753c1d512c81b6846a98507 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_31.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_31.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_31.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_31.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_31.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5db2e782cbfe6de8800490f4d54c35539fa4875 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6b0dc9c41c30539b97130c1f53afe03e063d50f2aa7bf0ccd6eb3639a8ada5 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_32.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_32.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_32.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_32.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ecb2147f3b6dfea731f6ed2ec307a35ddf0332f --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa1865a72ea515af24a159b3d3361036835ad5fe776785c6125ff5957ff317b +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_33.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_33.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_33.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_33.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_33.pt new file mode 100644 index 0000000000000000000000000000000000000000..553733b37aeb38b25894ca04e21e95115fe8848b --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_33.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f46c5fd97b3f3b6366e4e713487666c4268472ffc4df4395b67fdada2ea890e +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_34.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_34.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_34.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_34.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_34.pt new file mode 100644 index 0000000000000000000000000000000000000000..87c76dd9550945666a8f6c8edfec1df5e4533e71 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00142757d273de0cd3778e9102e0290a4f02a120318d80cc4f3f88ffa5108adc +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_35.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_35.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_35.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_35.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_35.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b7c6bab5e9759cae6a8a1ec4c42223a3dc432da --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_35.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a3f38457cba63485f817ff47159eae3d07116996a93d6b159b618bdebd09b6 +size 35718124 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_4.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_4.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_4.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4dfcc992e1a9327bac2ad3a7ca6f345b44755da --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427180f4ecc47dc45fa2c45574ef009216195227dea3cf8f51f2d87f395619ca +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_5.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_5.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_5.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_5.pt new file mode 100644 index 0000000000000000000000000000000000000000..9eb0ab9beafcd08651466f29c7aecbbf8a26d0ac --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14b22c5ae850cbbad637537cf4a0a7d558d420fbdeb95caa702a22b3db25e05 +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_6.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_6.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_6.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_6.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..575fb0fe613d3edf22d610bd871a70c91a78076f --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c362e6a2c6e5b5dcacb9f3f9758a7f7ec8d120aa0f927a5c25256d2f08094f4e +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_7.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_7.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_7.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_7.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_7.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7e691d09338a6c04c1287e33caff43392020cf2 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b9405e489313a72ae3e249b25d09def19f8ed2eefbc387b651e6ee3e407c22 +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_8.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_8.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_8.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_8.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_8.pt new file mode 100644 index 0000000000000000000000000000000000000000..0078878c7ef024a8da14285d0a4250e0559eecb1 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:358797ed8a9fabce00e6a38a472c81242862994da717915c54e4311ca8dac098 +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_9.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_9.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdd4a57834934f737458dc24a21202b2e841d --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_9.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 128, + "target_dim": 128, + "source_num_heads": 4, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1894, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_9.pt b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..014e935f67f45d8eb76a12a31fdb7db2cb83b6d7 --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:569b8ce27bd784fa1447f60040f8c1da7245d6b4af2aeb983dd36833ac3d374c +size 35718087 diff --git a/qwen3_8b+qwen2.5_7b_Fuser/final/projector_config.json b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6182a1047584c50c6cc8b40e34372a67375c67db --- /dev/null +++ b/qwen3_8b+qwen2.5_7b_Fuser/final/projector_config.json @@ -0,0 +1 @@ +{"0": {"1": {"0": [[0, 0]], "1": [[0, 1]], "2": [[0, 2]], "3": [[0, 3]], "4": [[0, 4]], "5": [[0, 5]], "6": [[0, 6]], "7": [[0, 7]], "8": [[0, 8]], "9": [[1, 9]], "10": [[2, 10]], "11": [[3, 11]], "12": [[4, 12]], "13": [[5, 13]], "14": [[6, 14]], "15": [[7, 15]], "16": [[8, 16]], "17": [[9, 17]], "18": [[10, 18]], "19": [[11, 19]], "20": [[12, 20]], "21": [[13, 21]], "22": [[14, 22]], "23": [[15, 23]], "24": [[16, 24]], "25": [[17, 25]], "26": [[18, 26]], "27": [[19, 27]], "28": [[20, 28]], "29": [[21, 29]], "30": [[22, 30]], "31": [[23, 31]], "32": [[24, 32]], "33": [[25, 33]], "34": [[26, 34]], "35": [[27, 35]]}}} \ No newline at end of file