Imperius
/

Meta-Qwen3.5-4B

Text Generation

selective-prediction

Model card Files Files and versions

Meta-Qwen3.5-4B / run.json

Imperius's picture

Upload run.json with huggingface_hub

11e1661 verified 14 days ago

History Blame Contribute Delete

855 Bytes

	{
	"format_version": "1.0",
	"created": "2026-06-19 11:20:51",
	"model_name": "Qwen/Qwen3.5-4B",
	"dtype": "bfloat16",
	"quantization": "nf4",
	"device": "cuda",
	"target_layers": [
	21,
	22,
	23,
	24,
	25,
	26,
	27,
	28,
	29,
	30,
	31
	],
	"cross_attn_layers": [
	21,
	22,
	23,
	24,
	25,
	26,
	27,
	28,
	29,
	30,
	31
	],
	"encoder_type": "selective",
	"prompt_format": "auto",
	"attn_implementation": null,
	"quantize_lm_head": false,
	"answer_suffix": "\n\nAnswer with ONLY the letter of the correct option (A, B, C, or D). Do not explain.",
	"chat_template_kwargs": {
	"enable_thinking": false
	},
	"dataset": "mmlu",
	"train_size": 250,
	"val_size": 50,
	"test_size": 50,
	"hidden_dim": 2560,
	"num_layers": 32
	}