Spaces:

tellurion
/

ColorizeDiffusion

Running on Zero

App Files Files Community

ColorizeDiffusion / configs /inference /xlv2.yaml

tellurion

initialize huggingface space demo

d066167 8 days ago

raw

history blame contribute delete

2.9 kB

	model:
	base_learning_rate: 1.0e-6
	target: refnet.models.v2-colorizerXL.InferenceWrapperXL
	params:
	linear_start: 0.00085
	linear_end: 0.0120
	timesteps: 1000
	image_size: 128
	channels: 4
	scale_factor: 0.13025
	controller: true

	unet_config:
	target: refnet.modules.unet.DualCondUNetXL
	params:
	use_checkpoint: True
	in_channels: 4
	in_channels_fg: 4
	out_channels: 4
	model_channels: 320
	adm_in_channels: 512
	num_classes: sequential
	attention_resolutions: [4, 2]
	num_res_blocks: 2
	channel_mult: [1, 2, 4]
	num_head_channels: 64
	use_spatial_transformer: true
	use_linear_in_transformer: true
	transformer_depth: [1, 2, 10]
	context_dim: 2048
	map_module: false
	warp_module: false
	style_modulation: false

	bg_encoder_config:
	target: refnet.modules.unet.ReferenceNet
	params:
	use_checkpoint: True
	in_channels: 6
	model_channels: 320
	adm_in_channels: 1024
	num_classes: sequential
	attention_resolutions: [ 4, 2 ]
	num_res_blocks: 2
	channel_mult: [ 1, 2, 4 ]
	num_head_channels: 64
	use_spatial_transformer: true
	use_linear_in_transformer: true
	disable_cross_attentions: true
	context_dim: 2048
	transformer_depth: [ 1, 2, 10 ]


	first_stage_config:
	target: ldm.models.autoencoder.AutoencoderKL
	params:
	embed_dim: 4
	ddconfig:
	double_z: true
	z_channels: 4
	resolution: 512
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult: [1, 2, 4, 4]
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0

	cond_stage_config:
	target: refnet.modules.embedder.HFCLIPVisionModel
	params:
	arch: ViT-bigG-14

	img_embedder_config:
	target: refnet.modules.embedder.WDv14SwinTransformerV2

	control_encoder_config:
	target: refnet.modules.encoder.MultiScaleAttentionEncoder
	params:
	in_ch: 3
	model_channels: 320
	ch_mults: [1, 2, 4]

	proj_config:
	target: refnet.modules.proj.ClusterConcat
	# target: refnet.modules.proj.RecoveryClusterConcat
	params:
	input_dim: 1280
	c_dim: 1024
	output_dim: 2048
	token_length: 196
	dim_head: 128

	scalar_embedder_config:
	target: refnet.modules.embedder.TimestepEmbedding
	params:
	embed_dim: 256

	lora_config:
	lora_params: [
	{
	label: background,
	root_module: model.diffusion_model,
	target_keys: [ attn2.to_q, attn2.to_k, attn2.to_v ],
	r: 4,
	}
	]