Meta-Qwen3.5-4B / run.json
Imperius's picture
Upload run.json with huggingface_hub
11e1661 verified
Raw
History Blame Contribute Delete
855 Bytes
{
"format_version": "1.0",
"created": "2026-06-19 11:20:51",
"model_name": "Qwen/Qwen3.5-4B",
"dtype": "bfloat16",
"quantization": "nf4",
"device": "cuda",
"target_layers": [
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"cross_attn_layers": [
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"encoder_type": "selective",
"prompt_format": "auto",
"attn_implementation": null,
"quantize_lm_head": false,
"answer_suffix": "\n\nAnswer with ONLY the letter of the correct option (A, B, C, or D). Do not explain.",
"chat_template_kwargs": {
"enable_thinking": false
},
"dataset": "mmlu",
"train_size": 250,
"val_size": 50,
"test_size": 50,
"hidden_dim": 2560,
"num_layers": 32
}