basketball_code / config /train_PEBBLE.yaml
youqiwong's picture
Upload folder using huggingface_hub
0c51b93 verified
defaults:
- agent: sac
# this needs to be specified manually
experiment: PEBBLE
# reward learning
segment: 50
activation: tanh
num_seed_steps: 1000
num_unsup_steps: 5000
num_interact: 5000
reward_lr: 0.0003
reward_batch: 128
# reward_update: 200
reward_update: 5 # for soccer
feed_type: 0
reset_update: 100
topK: 5
ensemble_size: 3
max_feedback: 1400
large_batch: 10
label_margin: 0.0
teacher_beta: -1
teacher_gamma: 1
teacher_eps_mistake: 0
teacher_eps_skip: 0
teacher_eps_equal: 0
# scheduling
reward_schedule: 0
num_train_steps: 1e6
replay_buffer_capacity: ${num_train_steps}
# evaluation config
eval_frequency: 10000
num_eval_episodes: 10
device: cuda
# logger
log_frequency: 10000
log_save_tb: false
save_interval: ${num_interact}
# video recorder
save_video: false
# setups
seed: 1
# Environment
env: dog_stand
gradient_update: 1
# vlm label
vlm_label: 0
vlm: bard
flip_vlm_label: 0
sum_segment_score: false
collect_data_interval: 0
max_image_difference: 0
use_first_and_last: 0
image_reward: 0
resnet: 0 # default
conv_kernel_sizes: [5, 3, 3, 3] # default
conv_n_channels: [16, 32, 64, 128] # default
conv_strides: [3, 2, 2, 2] # default
image_size: 300
cached_label_path: null
# exp_name
exp_name: gemini_template_1
prompt: ???
clip_prompt: "The green drawer is completely opened."
reward: learn_from_preference
# load pretrained models
reward_model_load_dir: "None"
reward_model_score_load_dir: "None"
agent_model_load_dir: "None"
mode: train
save_images: false
# hydra configuration
hydra:
name: ${env}
run:
dir: ./exp/${exp_name}/${env}/${now:%Y-%m-%d}-${now:%H-%M-%S}/vlm_${vlm_label}${vlm}_reward${reward}_H${diag_gaussian_actor.params.hidden_dim}_L${diag_gaussian_actor.params.hidden_depth}_lr${agent.params.actor_lr}/teacher_b${teacher_beta}_g${teacher_gamma}_m${teacher_eps_mistake}_s${teacher_eps_skip}_e${teacher_eps_equal}/label_smooth_${label_margin}/schedule_${reward_schedule}/${experiment}_init${num_seed_steps}_unsup${num_unsup_steps}_inter${num_interact}_maxfeed${max_feedback}_seg${segment}_act${activation}_Rlr${reward_lr}_Rbatch${reward_batch}_Rupdate${reward_update}_en${ensemble_size}_sample${feed_type}_large_batch${large_batch}_seed${seed}