lukewys's picture
Upload folder using huggingface_hub
9e5d280 verified
AdamW.lr: 0.0001
AdamW.weight_decay: 0.0
DecoderTransformer.depth: 8
DecoderTransformer.dim: 512
DecoderTransformer.dropout: 0.1
DecoderTransformer.heads: 8
DecoderTransformer.max_seq_len: 514
accelerator: auto
actor_init_on_gpu: false
actor_learning_rate: 5.0e-06
adam_betas:
- 0.9
- 0.95
adam_offload: false
advantage_estimator: gae_interleave
anchor_model_path: logs/enc_dec_base_chord_3_datasets/step=13000.ckpt
args.debug: 0
args.load: configs/single_agent_rl/gapt.yml
args.save: null
args.unknown: []
aux_loss_coef: 0.0
batch_size: 64
bf16: true
buffer_cpu_offload: false
cache_dir: data/cache
checkpoint_interval: 1000
checkpoint_metric: val/loss
checkpoint_mode: min
checkpoint_top_k: -1
chord_names_path: data/hooktheory/chord_names.json
compile: true
contrastive_reward_model_path:
- logs/contrastive_reward_3_datasets/step=8000.ckpt
- logs/contrastive_reward_2_3_datasets/step=8000.ckpt
contrastive_reward_rhythm_model_path:
- logs/contrastive_reward_no_augmentation_rhythm_3_datasets/step=2500.ckpt
- logs/contrastive_reward_no_augmentation_rhythm_2_3_datasets/step=2500.ckpt
contrastive_reward_rhythm_weight: 1.0
counterpart_vram_swap: false
critic_learning_rate: 9.0e-05
data_augmentation: true
data_path: data/hooktheory/Hooktheory.json.gz
dataloader_pin_memory: false
datasets:
- hooktheory
- pop909
- nottingham
devices: auto
disable_trace_cache: false
discriminative_reward_model_path:
- logs/discriminative_reward_128_bs_3_datasets/step=3000.ckpt
- logs/discriminative_reward_128_bs_2_3_datasets/step=3000.ckpt
discriminative_reward_rhythm_model_path:
- logs/discriminative_reward_no_augmentation_rhythm_3_datasets/step=3000.ckpt
- logs/discriminative_reward_no_augmentation_rhythm_2_3_datasets/step=3000.ckpt
discriminative_reward_rhythm_weight: 1.0
enable_reward_label_smoothing: true
entropy_loss_coef: 0.01
eps_clip: 0.2
eval_steps: 200
flash_attn: true
freezing_actor_steps: 0
gail_discriminative_model_configs:
depth: 8
dim: 512
dropout: 0.1
heads: 8
gail_reward_formulation: logits_prob_log
gail_reward_learning_rate: 9.0e-05
gamma: 1
grad_accum_dtype: null
gradient_checkpointing: false
gradient_checkpointing_use_reentrant: false
init_kl_coef: 0.001
invalid_output_penalty_weight: 1.0
kl_estimator: k3
kl_horizon: 10000
kl_target: null
l2: 0.0
lambd: 0.95
limit_eval_batches: 4
lit_module_override_args:
chord_names_path: data/cache/chord_names_augmented.json
data_path: data/hooktheory/Hooktheory.json.gz
num_workers: 4
local_rank: -1
log_every_n_steps: 1
logging_steps: 1
logits_vram_swap: true
max_epochs: 1
max_len: 512
max_log_examples: 8
max_norm: 1.0
max_samples: 1000000
micro_rollout_batch_size: 384
micro_train_batch_size: 48
model_part: chord
model_type: decoder_only
n_samples_per_prompt: 1
normalize_reward: true
num_nodes: 1
num_steps: 1000
num_workers: 8
overfit_batches: 0
packing_samples: false
precision: bf16-mixed
pretrain_data: null
pretrain_model_path: logs/decoder_only_online_chord_3_datasets/step=11000.ckpt
repetition_penalty_threshold: 4
repetition_penalty_weight: 1.0
reward_apply_threshold_after_steps: 200
reward_average_steps: 3
reward_clip_range: null
reward_update_early_stop_steps: 500
reward_update_steps: 5
reward_update_strategy: average
reward_update_threshold: 1.0
reward_vram_swap: false
rollout_batch_size: 384
sample_interval: 5000
save_dir: logs/gapt
save_eval_gen: false
save_steps: -1
save_value_network: false
seed: 42
strategy: auto
temperature: 0.99
top_p: 1.0
train_batch_size: 48
train_steps: 30000
trainer_empty_cache: false
use_full_kl: true
use_kl_estimator_k3: false
use_kl_loss: false
use_reverse_kl: false
use_tensorboard: false
use_wandb: 'true'
val_interval: 1000
value_clip: null
wandb_group: null
wandb_org: null
wandb_project: realchords
wandb_run_name: null
warmup_steps: 100
weights:
- 0.6
- 0.3
- 0.1
zero_stage: 0
zpg: 1