owl-idm-v0-tiny / config.yml
shahbuland's picture
Upload folder using huggingface_hub
8942ffb verified
model:
model_id: simple
# Simple encoder config (spatial conv + temporal transformer)
encoder:
d_model: 512
n_heads: 8
n_layers: 12
max_seq_len: 64 # Maximum temporal sequence length
train:
model_id: simple
trainer_id: basic
data_dir: /mnt/data/waypoint_1/owl_control_1.1.x/kbm/fps
target_size: [256, 256]
window_length: 8
batch_size: 16
sample_data_dir: /mnt/data/waypoint_1/owl_control_1.1.x/kbm/fps
n_samples: 8
sample_window_length: 128
epochs: 1000
opt: AdamW
opt_kwargs:
lr: 1.0e-4
betas: [0.9, 0.95]
eps: 1.0e-15
weight_decay: 1.0e-2
checkpoint_dir: ./checkpoints/simple
output_path: ./checkpoints/simple/ema
resume_ckpt: latest
sample_interval: 100
save_interval: 100
# Use log1p scaling for mouse inputs
use_log1p_scaling: true
logging:
name: shahbuland
project: owl-idm-v3
run_name: simple-v0