ohicarip commited on
Commit
dd43415
·
1 Parent(s): 5f40b89

upload model

Browse files
Files changed (2) hide show
  1. epoch-1.safetensors +3 -0
  2. model_config.yaml +104 -0
epoch-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3065e97da149313df8a4c8856c98b37034e1bd34ab55d72c1e5cab06aa22798c
3
+ size 12270650212
model_config.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-6
3
+ target: refnet.models.animator.Animator
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: reference
12
+ control_key: control
13
+ image_size: 64
14
+ channels: 4
15
+ cond_stage_trainable: false
16
+ conditioning_key: inject
17
+ monitor: val/loss_simple_ema
18
+ scale_factor: 0.18215
19
+ use_ema: true
20
+ ucg_rate: 0.1
21
+ use_token: true
22
+ offset_noise_level: 0.05
23
+ is_first_stage: False
24
+
25
+ unet_config:
26
+ target: refnet.modules.unet.InferenceDualCondUNet
27
+ params:
28
+ image_size: 32 # unused
29
+ in_channels: 4
30
+ c_channels: 3
31
+ out_channels: 4
32
+ model_channels: 320
33
+ attention_resolutions: [ 4, 2, 1 ]
34
+ num_res_blocks: 2
35
+ channel_mult: [ 1, 2, 4, 4 ]
36
+ num_head_channels: 64
37
+ use_spatial_transformer: True
38
+ transformer_depth: 1
39
+ context_dim: 768
40
+ use_checkpoint: True
41
+ legacy: False
42
+ use_temporal: true
43
+ frames_t: 8
44
+
45
+ refnet_config:
46
+ target: refnet.modules.unet.InferenceReferenceNet
47
+ params:
48
+ image_size: 32 # unused
49
+ in_channels: 4
50
+ out_channels: 4
51
+ model_channels: 320
52
+ attention_resolutions: [ 4, 2, 1 ]
53
+ num_res_blocks: 2
54
+ channel_mult: [ 1, 2, 4, 4 ]
55
+ num_head_channels: 64
56
+ use_spatial_transformer: True
57
+ transformer_depth: 1
58
+ context_dim: 768
59
+ use_checkpoint: True
60
+ legacy: False
61
+
62
+ first_stage_config:
63
+ target: ldm.models.autoencoder.AutoencoderKL
64
+ params:
65
+ embed_dim: 4
66
+ monitor: val/rec_loss
67
+ ddconfig:
68
+ double_z: true
69
+ z_channels: 4
70
+ resolution: 512
71
+ in_channels: 3
72
+ out_ch: 3
73
+ ch: 128
74
+ ch_mult: [1, 2, 4, 4]
75
+ num_res_blocks: 2
76
+ attn_resolutions: []
77
+ dropout: 0.0
78
+ lossconfig:
79
+ target: torch.nn.Identity
80
+ is_first_stage: False
81
+
82
+ cond_stage_config:
83
+ target: refnet.modules.encoders.FrozenOpenCLIPImageEmbedder
84
+ params:
85
+ arch: ViT-L-14
86
+ output_tokens: true
87
+ is_first_stage: False
88
+
89
+ dataloader:
90
+ class: AnimateLoader
91
+ params:
92
+ transform_list:
93
+ flip: true
94
+ rotate: false
95
+ resize: true
96
+ jitter: False
97
+ rotate_range: 45
98
+ refset_key: reference # only use deformation training in ColorizeDiffusion v2
99
+ load_size: 576
100
+ crop_size: 512 # crop images to (crop_size, crop_size), randomly crop images when crop_size < load_size
101
+ keep_ratio: false
102
+ inverse_grayscale: true
103
+ is_first_stage: False
104
+ shuffle: true