nitky
/

Swallow-70b-RP

Text Generation

text-generation-inference

Model card Files Files and versions

Swallow-70b-RP / mergekit_config.yml

nitky's picture

Upload 23 files

1e744ea verified about 2 years ago

history blame contribute delete

1.45 kB

	models:
	- model: tokyotech-llm/Swallow-70b-instruct-hf
	# no parameters necessary for base model
	- model: nitky/Swallow-70b-NVE-RP
	parameters:
	density: 1
	weight:
	- filter: mlp
	value: 0.1
	- filter: self_attn
	value: 0.4
	- value: 0 # fallback for rest of tensors.
	merge_method: dare_ties
	base_model: tokyotech-llm/Swallow-70b-instruct-hf
	dtype: bfloat16
	tokenizer_source: union
	name: Swallow-70b-RP-base
	---
	models:
	- model: tokyotech-llm/Swallow-70b-instruct-hf
	# no parameters necessary for base model
	- model: nitky/Swallow-70b-NVE-RP
	parameters:
	density: 1
	weight:
	- filter: mlp
	value: [0.4, 0.1, 0.4, 0.1, 0.4, 0.1, 0.4, 0.1, 0.1]
	- filter: self_attn
	value: [0.4, 0.4, 0.1, 0.4, 0.1, 0.4, 0.1, 0.4, 0.4]
	- value: 0 # fallback for rest of tensors.
	merge_method: dare_ties
	base_model: tokyotech-llm/Swallow-70b-instruct-hf
	dtype: bfloat16
	tokenizer_source: union
	name: Swallow-70b-RP-flavor
	---
	slices:
	- sources:
	- model: Swallow-70b-RP-base
	layer_range: [0, 80]
	- model: Swallow-70b-RP-flavor
	layer_range: [0, 80]
	merge_method: slerp
	base_model: Swallow-70b-RP-base
	parameters:
	t: # model stabilization
	- filter: self_attn
	value: [0, 0.5, 0.3, 0.7, 1]
	- filter: mlp
	value: [1, 0.5, 0.7, 0.3, 0]
	- value: 0.5 # fallback for rest of tensors
	dtype: bfloat16
	name: Swallow-70b-RP