ohicarip
/

animateanyone

Model card Files Files and versions

ohicarip commited on Jan 16, 2024

Commit

dd43415

·

1 Parent(s): 5f40b89

upload model

Files changed (2) hide show

epoch-1.safetensors +3 -0
model_config.yaml +104 -0

epoch-1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3065e97da149313df8a4c8856c98b37034e1bd34ab55d72c1e5cab06aa22798c
+size 12270650212

model_config.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+model:
+  base_learning_rate: 1.0e-6
+  target: refnet.models.animator.Animator
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: image
+    cond_stage_key: reference
+    control_key: control
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: inject
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: true
+    ucg_rate: 0.1
+    use_token: true
+    offset_noise_level: 0.05
+    is_first_stage: False
+    unet_config:
+      target: refnet.modules.unet.InferenceDualCondUNet
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        c_channels: 3
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+        use_temporal: true
+        frames_t: 8
+    refnet_config:
+      target: refnet.modules.unet.InferenceReferenceNet
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 512
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult: [1, 2, 4, 4]
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+        is_first_stage: False
+    cond_stage_config:
+      target: refnet.modules.encoders.FrozenOpenCLIPImageEmbedder
+      params:
+        arch: ViT-L-14
+        output_tokens: true
+        is_first_stage: False
+dataloader:
+  class: AnimateLoader
+  params:
+    transform_list:
+      flip: true
+      rotate: false
+      resize: true
+      jitter: False
+      rotate_range: 45
+    refset_key: reference   # only use deformation training in ColorizeDiffusion v2
+    load_size: 576
+    crop_size: 512     # crop images to (crop_size, crop_size), randomly crop images when crop_size < load_size
+    keep_ratio: false
+    inverse_grayscale: true
+    is_first_stage: False
+  shuffle: true