| { | |
| "format_version": "1.0", | |
| "created": "2026-06-19 11:20:51", | |
| "model_name": "Qwen/Qwen3.5-4B", | |
| "dtype": "bfloat16", | |
| "quantization": "nf4", | |
| "device": "cuda", | |
| "target_layers": [ | |
| 21, | |
| 22, | |
| 23, | |
| 24, | |
| 25, | |
| 26, | |
| 27, | |
| 28, | |
| 29, | |
| 30, | |
| 31 | |
| ], | |
| "cross_attn_layers": [ | |
| 21, | |
| 22, | |
| 23, | |
| 24, | |
| 25, | |
| 26, | |
| 27, | |
| 28, | |
| 29, | |
| 30, | |
| 31 | |
| ], | |
| "encoder_type": "selective", | |
| "prompt_format": "auto", | |
| "attn_implementation": null, | |
| "quantize_lm_head": false, | |
| "answer_suffix": "\n\nAnswer with ONLY the letter of the correct option (A, B, C, or D). Do not explain.", | |
| "chat_template_kwargs": { | |
| "enable_thinking": false | |
| }, | |
| "dataset": "mmlu", | |
| "train_size": 250, | |
| "val_size": 50, | |
| "test_size": 50, | |
| "hidden_dim": 2560, | |
| "num_layers": 32 | |
| } |