recipe.yaml · cyankiwi/OmniCoder-9B-AWQ-4bit at main

Upload folder using huggingface_hub

72e9318 verified about 2 months ago

2.35 kB

	default_stage:
	default_modifiers:
	AWQModifier:
	config_groups:
	group_0:
	targets: [Linear]
	weights:
	num_bits: 4
	type: int
	symmetric: true
	group_size: 32
	strategy: group
	block_structure: null
	dynamic: false
	actorder: null
	scale_dtype: null
	zp_dtype: null
	observer: mse
	observer_kwargs: {}
	input_activations: null
	output_activations: null
	format: null
	targets: [Linear]
	ignore: ['re:.embed_tokens', 're:.linear_attn[.]conv1d', 're:.*linear_attn[.]in_proj_a',
	're:.linear_attn[.]in_proj_b', 're:model[.]visual.', 're:mtp.*', lm_head]
	bypass_divisibility_checks: false
	mappings:
	- smooth_layer: re:model.*layers[.](3\|7\|11\|15\|19\|23\|27\|31)[.]input_layernorm
	balance_layers: ['re:model.*layers[.](3\|7\|11\|15\|19\|23\|27\|31)[.]self_attn[.]q_proj',
	're:model.layers[.](3\|7\|11\|15\|19\|23\|27\|31)[.]self_attn[.]k_proj', 're:model.layers[.](3\|7\|11\|15\|19\|23\|27\|31)[.]self_attn[.]v_proj']
	activation_hook_target: null
	- smooth_layer: re:model.*layers[.](3\|7\|11\|15\|19\|23\|27\|31)[.]self_attn[.]v_proj
	balance_layers: ['re:model.*layers[.](3\|7\|11\|15\|19\|23\|27\|31)[.]self_attn[.]o_proj']
	activation_hook_target: null
	- smooth_layer: re:model.*layers[.](0\|1\|2\|4\|5\|6\|8\|9\|10\|12\|13\|14\|16\|17\|18\|20\|21\|22\|24\|25\|26\|28\|29\|30)[.]input_layernorm
	balance_layers: ['re:model.*layers[.](0\|1\|2\|4\|5\|6\|8\|9\|10\|12\|13\|14\|16\|17\|18\|20\|21\|22\|24\|25\|26\|28\|29\|30)[.]linear_attn[.]in_proj_qkv',
	're:model.*layers[.](0\|1\|2\|4\|5\|6\|8\|9\|10\|12\|13\|14\|16\|17\|18\|20\|21\|22\|24\|25\|26\|28\|29\|30)[.]linear_attn[.]in_proj_z',
	're:model.*layers[.](0\|1\|2\|4\|5\|6\|8\|9\|10\|12\|13\|14\|16\|17\|18\|20\|21\|22\|24\|25\|26\|28\|29\|30)[.]linear_attn[.]in_proj_b',
	're:model.*layers[.](0\|1\|2\|4\|5\|6\|8\|9\|10\|12\|13\|14\|16\|17\|18\|20\|21\|22\|24\|25\|26\|28\|29\|30)[.]linear_attn[.]in_proj_a']
	activation_hook_target: null
	- smooth_layer: re:model.*post_attention_layernorm
	balance_layers: ['re:model.mlp[.]gate_proj', 're:model.mlp[.]up_proj']
	activation_hook_target: null
	offload_device: !!python/object/apply:torch.device [cuda]
	duo_scaling: true
	n_grid: 20