#!/usr/bin/env bash


# ***** global config *****
export MUJOCO_GL=egl 
export PYOPENGL_PLATFORM=egl 

TIMESTAMP=$(date +"%y%m%d-%H:%M:%S")
USERNAME="jedeka30"

export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# # ***** run lerobot act training *****
  # --dataset.root=datasets/grasp_box_local_260304-15_03_1772610893 \
  # --dataset.root=datasets/fast \
  # --steps=100000 \
  # --policy.repo_id=$USERNAME/${1}_model \


# export HF_HOME=/path/to/your/hf_cache
export TRANSFORMERS_OFFLINE=1
export HF_DATASETS_OFFLINE=1
# export HF_HUB_OFFLINE=1

# echo $HF_HUB_OFFLINE
# ***** tmp *****

######
# DiT - Diffusion
######
CUDA_VISIBLE_DEVICES=1 lerobot-train \
  --dataset.root=$2 \
  --dataset.repo_id=YOUR_DATASET \
  --output_dir=outputs/train/${1}_model/$TIMESTAMP \
\
  --policy.type=$1 \
  --policy.device=cuda \
  --policy.horizon=32 \
  --policy.n_action_steps=24 \
  --policy.objective=diffusion \
  --policy.noise_scheduler_type=DDIM \
  --policy.num_train_timesteps=100 \
  --policy.num_inference_steps=10 \
  --policy.clip_sample=true \
  --policy.clip_sample_range=1.0 \
  --policy.num_inference_steps=20 \
\
  --policy.repo_id="jedeka30/grasp_box" \
  --wandb.enable=true \
  --wandb.project=act \
  --job_name=${1}_${TIMESTAMP} \
  --save_checkpoint=true \
  --steps=100000 \
  --save_freq=5000 \
  --batch_size=24 \
  --eval.batch_size=1 \
  --eval.n_episodes=1 \
  --eval_freq=2000 \
  --num_workers=8


######
# DiT - Flow Matching
######

# CUDA_VISIBLE_DEVICES=0 lerobot-train \
#   --dataset.root=$2 \
#   --dataset.repo_id=YOUR_DATASET \
#   --output_dir=outputs/train/${1}_model/$TIMESTAMP \
#   \
#   --policy.type=$1 \
#   --policy.device=cuda \
#   --policy.horizon=32 \
#   --policy.n_action_steps=24 \
#   --policy.objective=flow_matching \
#   --policy.timestep_sampling_strategy=beta \
#   --policy.timestep_sampling_alpha=1.5 \
#   --policy.timestep_sampling_beta=1.0 \
#   --policy.timestep_sampling_s=0.999 \
#   --policy.num_integration_steps=100 \
#   --policy.integration_method=euler \
#   --policy.sigma_min=0.0 \
#   --policy.hidden_dim=512 \
#   --policy.dropout=0.1 \
#   --policy.timestep_embed_dim=256 \
#   --policy.use_rope=true \
#   --policy.rope_base=10000.0 \
#   --policy.optimizer_lr=2e-5 \
#   --policy.vision_encoder_lr_multiplier=0.1 \
#   --policy.num_train_timesteps=100 \
#   --policy.num_inference_steps=20 \
#   \
#   --policy.repo_id="jedeka30/grasp_box" \
#   --wandb.enable=true \
#   --wandb.project=act \
#   --job_name=${1}_${TIMESTAMP} \
#   --save_checkpoint=true \
#   --steps=100000 \
#   --save_freq=5000 \
#   --batch_size=24 \
#   --eval.batch_size=1 \
#   --eval.n_episodes=1 \
#   --eval_freq=2000 \
#   --num_workers=8


  # --policy.num_layers=4 \
  # --policy.num_heads=8 \
  # --policy.image_resize_shape=[320,240] \
  # --policy.image_crop_shape=[224,224] \
  # --policy.image_crop_is_random=true \


######
# REFERENCES: OTHER ARGS
######

# # Small datasets (< 100 examples)
# --policy.num_layers=4 \
# --policy.hidden_dim=512 \
# --policy.num_heads=8  # should ideally be hidden_dim // 64

# # Medium datasets (100-5k examples) - default
# --policy.num_layers=6 \
# --policy.hidden_dim=512 \
# --policy.num_heads=8  # should ideally be hidden_dim // 64

# # Large datasets (> 5k examples)
# --policy.num_layers=8 \
# --policy.hidden_dim=512 \
# --policy.num_heads=8   # should ideally be hidden_dim // 64

# # Rotary Position Embedding (RoPE) - default, recommended
# --policy.use_rope=true \
# --policy.rope_base=10000.0  # Base frequency for RoPE

# # Absolute positional encoding
# --policy.use_positional_encoding=true  # Disables RoPE when true


rm -rf ~/.cache/wandb/artifacts/