Upload folder using huggingface_hub
Browse files- ae/ckpt.pt +3 -0
- ae/config.yaml +79 -0
- config.yaml +40 -0
- model.safetensors +3 -0
- owl_wms.commit +1 -0
ae/ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a1b558adab097e601598e7bb781e0f26035f5ac4a8fcfbe6eaa2b0f1eee3a07
|
| 3 |
+
size 6811962067
|
ae/config.yaml
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
model_id: dcae
|
| 3 |
+
sample_size: [360, 640]
|
| 4 |
+
channels: 3
|
| 5 |
+
latent_size: 32
|
| 6 |
+
latent_channels: 16
|
| 7 |
+
|
| 8 |
+
ch_0: 256
|
| 9 |
+
ch_max: 2048
|
| 10 |
+
|
| 11 |
+
encoder_blocks_per_stage: [4, 4, 4, 8]
|
| 12 |
+
decoder_blocks_per_stage: [4, 4, 4, 8]
|
| 13 |
+
|
| 14 |
+
use_middle_block: false
|
| 15 |
+
do_channel_mask: false
|
| 16 |
+
|
| 17 |
+
train:
|
| 18 |
+
trainer_id: rec
|
| 19 |
+
data_id: video_dir_loader
|
| 20 |
+
data_kwargs:
|
| 21 |
+
source:
|
| 22 |
+
- /mnt/data/datasets/extracted_tars/kbm/fps/*/*.mp4
|
| 23 |
+
- /mnt/data/datasets/extracted_tars/kbm/3ps/*/*.mp4
|
| 24 |
+
- /mnt/data/datasets/extracted_tars/kbm/other/*/*.mp4
|
| 25 |
+
target_size: [360, 640]
|
| 26 |
+
|
| 27 |
+
target_batch_size: 32
|
| 28 |
+
batch_size: 4
|
| 29 |
+
|
| 30 |
+
epochs: 200
|
| 31 |
+
|
| 32 |
+
#opt: AdamW
|
| 33 |
+
#opt_kwargs:
|
| 34 |
+
# lr: 3.0e-5
|
| 35 |
+
# weight_decay: 1.0e-4
|
| 36 |
+
# betas: [0.9, 0.95]
|
| 37 |
+
# eps: 1.0e-15
|
| 38 |
+
|
| 39 |
+
opt: Muon
|
| 40 |
+
opt_kwargs:
|
| 41 |
+
lr: 1.0e-3
|
| 42 |
+
momentum: 0.95
|
| 43 |
+
adamw_lr: 1.0e-5
|
| 44 |
+
adamw_wd: 1.0e-2
|
| 45 |
+
adamw_eps: 1.0e-6
|
| 46 |
+
adamw_betas: [0.9, 0.95]
|
| 47 |
+
adamw_keys:
|
| 48 |
+
- encoder.conv_in
|
| 49 |
+
- encoder.conv_out
|
| 50 |
+
- encoder.conv_out_logvar
|
| 51 |
+
- decoder.conv_in
|
| 52 |
+
- decoder.conv_out
|
| 53 |
+
- .up.
|
| 54 |
+
- .down.
|
| 55 |
+
- .residuals.
|
| 56 |
+
|
| 57 |
+
lpips_type: convnext
|
| 58 |
+
loss_weights:
|
| 59 |
+
kl: 3.0e-7
|
| 60 |
+
lpips: 12.0
|
| 61 |
+
l2: 1.0
|
| 62 |
+
dwt: 0.25
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
scheduler: LinearWarmup
|
| 66 |
+
scheduler_kwargs:
|
| 67 |
+
warmup_steps: 3000
|
| 68 |
+
min_lr: 1.0e-5
|
| 69 |
+
|
| 70 |
+
checkpoint_dir: checkpoints/waypoint_1_vae_owlc_f16_c16
|
| 71 |
+
resume_ckpt: checkpoints/waypoint_1_vae_owlc_f16_c16/step_200000.pt
|
| 72 |
+
|
| 73 |
+
sample_interval: 1000
|
| 74 |
+
save_interval: 5000
|
| 75 |
+
|
| 76 |
+
wandb:
|
| 77 |
+
name: shahbuland
|
| 78 |
+
project: new_vaes_v2
|
| 79 |
+
run_name: waypoint_1_vae_owlc_f16_c16_highres
|
config.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
channels: 16
|
| 2 |
+
n_layers: 22
|
| 3 |
+
n_heads: 40
|
| 4 |
+
n_kv_heads: 20
|
| 5 |
+
d_model: 2560
|
| 6 |
+
mlp_ratio: 5
|
| 7 |
+
mlp_gradient_checkpointing: true
|
| 8 |
+
causal: true
|
| 9 |
+
n_buttons: 256
|
| 10 |
+
tokens_per_frame: 256
|
| 11 |
+
height: 16
|
| 12 |
+
width: 16
|
| 13 |
+
patch:
|
| 14 |
+
- 2
|
| 15 |
+
- 2
|
| 16 |
+
base_fps: 60
|
| 17 |
+
local_window: 16
|
| 18 |
+
global_window: 128
|
| 19 |
+
global_pinned_dilation: 8
|
| 20 |
+
global_attn_period: 4
|
| 21 |
+
global_attn_offset: 0
|
| 22 |
+
value_residual: false
|
| 23 |
+
n_frames: 800
|
| 24 |
+
rope_impl: ortho
|
| 25 |
+
gated_attn: true
|
| 26 |
+
noise_conditioning: wan
|
| 27 |
+
ctrl_conditioning: mlp_fusion
|
| 28 |
+
ctrl_cond_dropout: 0.0
|
| 29 |
+
ctrl_conditioning_period: 3
|
| 30 |
+
prompt_conditioning: cross_attention
|
| 31 |
+
prompt_cond_dropout: 0.0
|
| 32 |
+
prompt_embedding_dim: 2048
|
| 33 |
+
prompt_conditioning_period: 3
|
| 34 |
+
block_gradient_checkpointing: true
|
| 35 |
+
scheduler_sigmas:
|
| 36 |
+
- 1.0
|
| 37 |
+
- 0.8609585762023926
|
| 38 |
+
- 0.729332447052002
|
| 39 |
+
- 0.3205108940601349
|
| 40 |
+
- 0.0
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14356db9229453850f9ad650f31c3e1c4744066abd43562f6fbee161fb36c9e6
|
| 3 |
+
size 12515075376
|
owl_wms.commit
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
da4ed5ea709dd8418ebb2ac1082083758a458bf4
|