Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- contrastive/20260123_1403_tune_mulan_transformer/config copy.yaml +163 -0
- contrastive/20260123_1403_tune_mulan_transformer/config.yaml +163 -0
- contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.0608797/events.out.tfevents.1769148247.MACLAB-S004.302904.1 +3 -0
- contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.062121/hparams.yml +5 -0
- contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/events.out.tfevents.1769148247.MACLAB-S004.302904.0 +3 -0
- contrastive/20260123_1403_tune_mulan_transformer/logs/train.log +15 -0
- contrastive/train.20260123_1403_tune_mulan_transformer.log +0 -0
- downstream/20260121_1942/config.yaml +56 -0
- downstream/20260121_1942/train.log +113 -0
- downstream/20260121_2108/config.yaml +56 -0
- downstream/20260121_2108/train.log +26 -0
- downstream/20260121_2112/config.yaml +56 -0
- downstream/20260121_2112/train.log +41 -0
- downstream/20260121_2116/config.yaml +56 -0
- downstream/20260121_2116/train.log +41 -0
- downstream/20260121_2117/config.yaml +56 -0
- downstream/20260121_2117/train.log +114 -0
- downstream/20260121_2145/config.yaml +56 -0
- downstream/20260121_2145/train.log +128 -0
- downstream/20260121_2200/config.yaml +59 -0
- downstream/20260121_2200/train.log +8 -0
- downstream/20260121_2202/config.yaml +59 -0
- downstream/20260121_2202/train.log +34 -0
- downstream/20260121_2203/config.yaml +55 -0
- downstream/20260121_2203/train.log +94 -0
- downstream/20260121_2243/config.yaml +55 -0
- downstream/20260121_2243/train.log +112 -0
- downstream/20260121_2300/config.yaml +56 -0
- downstream/20260121_2300/train.log +7 -0
- downstream/20260121_2319/config.yaml +55 -0
- downstream/20260121_2319/train.log +45 -0
- downstream/20260121_2327/config.yaml +56 -0
- downstream/20260121_2327/train.log +156 -0
- downstream/20260123_0028/downstream_config.yaml +54 -0
- downstream/20260123_0028/train.log +71 -0
- downstream_mixed/20260122_1200/config.yaml +56 -0
- downstream_mixed/20260122_1200/train.log +152 -0
- downstream_mixed/20260122_1955/config.yaml +69 -0
- downstream_mixed/20260122_1955/downstream_config.yaml +58 -0
- downstream_mixed/20260122_1955/train.log +153 -0
- downstream_mixed_linear/20260122_1143/config.yaml +144 -0
- downstream_mixed_linear/20260122_1143/downstream_config.yaml +56 -0
- downstream_mixed_linear/20260122_1143/predicted_0122_1533.jsonl +0 -0
- downstream_mixed_linear/20260122_1143/train.log +111 -0
- finetune_human/20260124_2143/config.yaml +142 -0
- finetune_human/20260124_2143/reward_model/1769262210.5061178/events.out.tfevents.1769262210.MACLAB-S004.2626926.1 +3 -0
- finetune_human/20260124_2143/reward_model/1769262210.5078583/hparams.yml +4 -0
- finetune_human/20260124_2143/reward_model/events.out.tfevents.1769262210.MACLAB-S004.2626926.0 +3 -0
- finetune_human/20260124_2143/train.20260124_2143.log +803 -0
- finetune_human/20260124_2354/config.yaml +142 -0
contrastive/20260123_1403_tune_mulan_transformer/config copy.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model: #* this is the actual model config used in training
|
| 2 |
+
name: reward
|
| 3 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 4 |
+
dim: 768
|
| 5 |
+
mode: concat_text_late
|
| 6 |
+
attention_mode: CA
|
| 7 |
+
mlp_dim: 768
|
| 8 |
+
output_dim: 2
|
| 9 |
+
sr: 24000
|
| 10 |
+
prompt_tf_depth: 1
|
| 11 |
+
joint_tf_depth: 1
|
| 12 |
+
dim_head: 64
|
| 13 |
+
heads: 8
|
| 14 |
+
attn_dropout: 0.0
|
| 15 |
+
ff_dropout: 0.0
|
| 16 |
+
ff_mult: 4
|
| 17 |
+
use_layer_idx: -1
|
| 18 |
+
freeze_audio: true
|
| 19 |
+
freeze_text: true
|
| 20 |
+
train_muq_depth: 0
|
| 21 |
+
text_encoder:
|
| 22 |
+
name: muq_mulan
|
| 23 |
+
tune: transformer
|
| 24 |
+
model_name: google/flan-t5-base
|
| 25 |
+
max_seq_len: 512
|
| 26 |
+
tune_last_n_layers: 6
|
| 27 |
+
use_lora: false
|
| 28 |
+
lora_r: 64
|
| 29 |
+
lora_alpha: 64
|
| 30 |
+
lora_dropout: 0.1
|
| 31 |
+
lora_target_modules: null
|
| 32 |
+
gradient_checkpointing: true
|
| 33 |
+
downsample:
|
| 34 |
+
eval: mean_4x
|
| 35 |
+
ref: null
|
| 36 |
+
text: mlp
|
| 37 |
+
configs:
|
| 38 |
+
none:
|
| 39 |
+
kind: none
|
| 40 |
+
factor: 1
|
| 41 |
+
mean:
|
| 42 |
+
kind: mean
|
| 43 |
+
factor: 10
|
| 44 |
+
mean_4x:
|
| 45 |
+
kind: mean+mlp
|
| 46 |
+
factor: 4
|
| 47 |
+
mlp_ratio: 2.0
|
| 48 |
+
dropout: 0.0
|
| 49 |
+
conv_4x:
|
| 50 |
+
kind: conv
|
| 51 |
+
factor: 4
|
| 52 |
+
stage: 1
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
use_layernorm: true
|
| 55 |
+
conv2_4x:
|
| 56 |
+
kind: conv*2
|
| 57 |
+
factor: 4
|
| 58 |
+
kernel_size: 5
|
| 59 |
+
use_layernorm: true
|
| 60 |
+
glu_4x:
|
| 61 |
+
kind: gluconv*2+pw
|
| 62 |
+
factor: 4
|
| 63 |
+
kernel_size: 5
|
| 64 |
+
use_layernorm: true
|
| 65 |
+
mlp:
|
| 66 |
+
kind: mean
|
| 67 |
+
factor: 1
|
| 68 |
+
mean_10x:
|
| 69 |
+
kind: mean+mlp
|
| 70 |
+
factor: 10
|
| 71 |
+
mlp_ratio: 2.0
|
| 72 |
+
dropout: 0.0
|
| 73 |
+
mean_30x:
|
| 74 |
+
kind: mean+mlp
|
| 75 |
+
factor: 30
|
| 76 |
+
mlp_ratio: 2.0
|
| 77 |
+
dropout: 0.0
|
| 78 |
+
text_lora_config: null
|
| 79 |
+
null_embedding:
|
| 80 |
+
text:
|
| 81 |
+
dropout: 0.2
|
| 82 |
+
length: 10
|
| 83 |
+
lyrics:
|
| 84 |
+
dropout: 0.3
|
| 85 |
+
length: 10
|
| 86 |
+
audio:
|
| 87 |
+
dropout: 0.5
|
| 88 |
+
length: 10
|
| 89 |
+
category_embeddings: null
|
| 90 |
+
load_config:
|
| 91 |
+
checkpoint_path: null
|
| 92 |
+
frozen_from_pretrained: true
|
| 93 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 94 |
+
strict: false
|
| 95 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 96 |
+
run_name: tune_mulan_transformer
|
| 97 |
+
basics:
|
| 98 |
+
save_dir: ${project_root}/experiments/contrastive
|
| 99 |
+
random_seed: 42
|
| 100 |
+
data:
|
| 101 |
+
mode: raw_text_frozen_audio
|
| 102 |
+
dataset_dir: ${project_root}/CMI-Training/contrastive_training
|
| 103 |
+
generation_index: null
|
| 104 |
+
max_samples: null
|
| 105 |
+
require_embeddings: true
|
| 106 |
+
require_scores: false
|
| 107 |
+
muq_mulan_model: OpenMuQ/MuQ-MuLan-large
|
| 108 |
+
sample_rate: 24000
|
| 109 |
+
gen_max_duration: 600.0
|
| 110 |
+
prompt_max_duration: 600.0
|
| 111 |
+
audio_trick:
|
| 112 |
+
enabled: true
|
| 113 |
+
prob: 0.3
|
| 114 |
+
duration: 10.0
|
| 115 |
+
audio_dropout:
|
| 116 |
+
enabled: true
|
| 117 |
+
min_duration: 500
|
| 118 |
+
max_duration: 1200
|
| 119 |
+
apply_to_eval: true
|
| 120 |
+
apply_to_ref: true
|
| 121 |
+
eval_only_on_training: true
|
| 122 |
+
val_frac: 0.01
|
| 123 |
+
loss:
|
| 124 |
+
loss_schedule:
|
| 125 |
+
use_matching_loss: 0
|
| 126 |
+
use_classification_loss: 0
|
| 127 |
+
use_hard_negatives: 0
|
| 128 |
+
classification_temp: 1.0
|
| 129 |
+
pos_scale: 2.0
|
| 130 |
+
threshold_mode: momentum
|
| 131 |
+
threshold_momentum: 0.99
|
| 132 |
+
global_threshold_path: null
|
| 133 |
+
threshold_percentile: 0.5
|
| 134 |
+
hard_negative_ratio: 0.8
|
| 135 |
+
similarity_fn: matching
|
| 136 |
+
matching_temperature: 0.07
|
| 137 |
+
use_queue: false
|
| 138 |
+
queue_size: 4096
|
| 139 |
+
train:
|
| 140 |
+
num_epochs: 100
|
| 141 |
+
output_dir: experiments/contrastive_learning
|
| 142 |
+
optimizer:
|
| 143 |
+
lr: 0.0005
|
| 144 |
+
muqmulan_lr: 0.0001
|
| 145 |
+
weight_decay: 0.01
|
| 146 |
+
beta1: 0.9
|
| 147 |
+
beta2: 0.999
|
| 148 |
+
scheduler:
|
| 149 |
+
type: cosine
|
| 150 |
+
warmup_steps: 1000
|
| 151 |
+
min_lr: 1.0e-05
|
| 152 |
+
gradient_accumulation_steps: 3
|
| 153 |
+
max_grad_norm: 10.0
|
| 154 |
+
mixed_precision: bf16
|
| 155 |
+
log_with: tensorboard
|
| 156 |
+
log_interval: 10
|
| 157 |
+
val_interval: 1
|
| 158 |
+
save_interval: 5
|
| 159 |
+
resume_from_checkpoint: null
|
| 160 |
+
batch_size: 40
|
| 161 |
+
matching_only_batch_size: 48
|
| 162 |
+
num_workers: 4
|
| 163 |
+
DEVICES: 6,7
|
contrastive/20260123_1403_tune_mulan_transformer/config.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model: #* this might be used for downstream tasks
|
| 2 |
+
name: reward
|
| 3 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 4 |
+
dim: 768
|
| 5 |
+
mode: concat_text_late
|
| 6 |
+
attention_mode: CA
|
| 7 |
+
mlp_dim: 768
|
| 8 |
+
output_dim: 2
|
| 9 |
+
sr: 24000
|
| 10 |
+
prompt_tf_depth: 1
|
| 11 |
+
joint_tf_depth: 1
|
| 12 |
+
dim_head: 64
|
| 13 |
+
heads: 8
|
| 14 |
+
attn_dropout: 0.0
|
| 15 |
+
ff_dropout: 0.0
|
| 16 |
+
ff_mult: 4
|
| 17 |
+
use_layer_idx: -1
|
| 18 |
+
freeze_audio: true
|
| 19 |
+
freeze_text: true
|
| 20 |
+
train_muq_depth: 0
|
| 21 |
+
text_encoder:
|
| 22 |
+
name: muq_mulan
|
| 23 |
+
tune: null
|
| 24 |
+
model_name: google/flan-t5-base
|
| 25 |
+
max_seq_len: 512
|
| 26 |
+
tune_last_n_layers: 6
|
| 27 |
+
use_lora: false
|
| 28 |
+
lora_r: 64
|
| 29 |
+
lora_alpha: 64
|
| 30 |
+
lora_dropout: 0.1
|
| 31 |
+
lora_target_modules: null
|
| 32 |
+
gradient_checkpointing: true
|
| 33 |
+
downsample:
|
| 34 |
+
eval: mean_4x
|
| 35 |
+
ref: null
|
| 36 |
+
text: mlp
|
| 37 |
+
configs:
|
| 38 |
+
none:
|
| 39 |
+
kind: none
|
| 40 |
+
factor: 1
|
| 41 |
+
mean:
|
| 42 |
+
kind: mean
|
| 43 |
+
factor: 10
|
| 44 |
+
mean_4x:
|
| 45 |
+
kind: mean+mlp
|
| 46 |
+
factor: 4
|
| 47 |
+
mlp_ratio: 2.0
|
| 48 |
+
dropout: 0.0
|
| 49 |
+
conv_4x:
|
| 50 |
+
kind: conv
|
| 51 |
+
factor: 4
|
| 52 |
+
stage: 1
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
use_layernorm: true
|
| 55 |
+
conv2_4x:
|
| 56 |
+
kind: conv*2
|
| 57 |
+
factor: 4
|
| 58 |
+
kernel_size: 5
|
| 59 |
+
use_layernorm: true
|
| 60 |
+
glu_4x:
|
| 61 |
+
kind: gluconv*2+pw
|
| 62 |
+
factor: 4
|
| 63 |
+
kernel_size: 5
|
| 64 |
+
use_layernorm: true
|
| 65 |
+
mlp:
|
| 66 |
+
kind: mean
|
| 67 |
+
factor: 1
|
| 68 |
+
mean_10x:
|
| 69 |
+
kind: mean+mlp
|
| 70 |
+
factor: 10
|
| 71 |
+
mlp_ratio: 2.0
|
| 72 |
+
dropout: 0.0
|
| 73 |
+
mean_30x:
|
| 74 |
+
kind: mean+mlp
|
| 75 |
+
factor: 30
|
| 76 |
+
mlp_ratio: 2.0
|
| 77 |
+
dropout: 0.0
|
| 78 |
+
text_lora_config: null
|
| 79 |
+
null_embedding:
|
| 80 |
+
text:
|
| 81 |
+
dropout: 0.2
|
| 82 |
+
length: 10
|
| 83 |
+
lyrics:
|
| 84 |
+
dropout: 0.3
|
| 85 |
+
length: 10
|
| 86 |
+
audio:
|
| 87 |
+
dropout: 0.5
|
| 88 |
+
length: 10
|
| 89 |
+
category_embeddings: null
|
| 90 |
+
load_config:
|
| 91 |
+
checkpoint_path: null
|
| 92 |
+
frozen_from_pretrained: true
|
| 93 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 94 |
+
strict: false
|
| 95 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 96 |
+
run_name: tune_mulan_transformer
|
| 97 |
+
basics:
|
| 98 |
+
save_dir: ${project_root}/experiments/contrastive
|
| 99 |
+
random_seed: 42
|
| 100 |
+
data:
|
| 101 |
+
mode: raw_text_frozen_audio
|
| 102 |
+
dataset_dir: ${project_root}/CMI-Training/contrastive_training
|
| 103 |
+
generation_index: null
|
| 104 |
+
max_samples: null
|
| 105 |
+
require_embeddings: true
|
| 106 |
+
require_scores: false
|
| 107 |
+
muq_mulan_model: OpenMuQ/MuQ-MuLan-large
|
| 108 |
+
sample_rate: 24000
|
| 109 |
+
gen_max_duration: 600.0
|
| 110 |
+
prompt_max_duration: 600.0
|
| 111 |
+
audio_trick:
|
| 112 |
+
enabled: true
|
| 113 |
+
prob: 0.3
|
| 114 |
+
duration: 10.0
|
| 115 |
+
audio_dropout:
|
| 116 |
+
enabled: true
|
| 117 |
+
min_duration: 500
|
| 118 |
+
max_duration: 1200
|
| 119 |
+
apply_to_eval: true
|
| 120 |
+
apply_to_ref: true
|
| 121 |
+
eval_only_on_training: true
|
| 122 |
+
val_frac: 0.01
|
| 123 |
+
loss:
|
| 124 |
+
loss_schedule:
|
| 125 |
+
use_matching_loss: 0
|
| 126 |
+
use_classification_loss: 0
|
| 127 |
+
use_hard_negatives: 0
|
| 128 |
+
classification_temp: 1.0
|
| 129 |
+
pos_scale: 2.0
|
| 130 |
+
threshold_mode: momentum
|
| 131 |
+
threshold_momentum: 0.99
|
| 132 |
+
global_threshold_path: null
|
| 133 |
+
threshold_percentile: 0.5
|
| 134 |
+
hard_negative_ratio: 0.8
|
| 135 |
+
similarity_fn: matching
|
| 136 |
+
matching_temperature: 0.07
|
| 137 |
+
use_queue: false
|
| 138 |
+
queue_size: 4096
|
| 139 |
+
train:
|
| 140 |
+
num_epochs: 100
|
| 141 |
+
output_dir: experiments/contrastive_learning
|
| 142 |
+
optimizer:
|
| 143 |
+
lr: 0.0005
|
| 144 |
+
muqmulan_lr: 0.0001
|
| 145 |
+
weight_decay: 0.01
|
| 146 |
+
beta1: 0.9
|
| 147 |
+
beta2: 0.999
|
| 148 |
+
scheduler:
|
| 149 |
+
type: cosine
|
| 150 |
+
warmup_steps: 1000
|
| 151 |
+
min_lr: 1.0e-05
|
| 152 |
+
gradient_accumulation_steps: 3
|
| 153 |
+
max_grad_norm: 10.0
|
| 154 |
+
mixed_precision: bf16
|
| 155 |
+
log_with: tensorboard
|
| 156 |
+
log_interval: 10
|
| 157 |
+
val_interval: 1
|
| 158 |
+
save_interval: 5
|
| 159 |
+
resume_from_checkpoint: null
|
| 160 |
+
batch_size: 40
|
| 161 |
+
matching_only_batch_size: 48
|
| 162 |
+
num_workers: 4
|
| 163 |
+
DEVICES: 6,7
|
contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.0608797/events.out.tfevents.1769148247.MACLAB-S004.302904.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a00f486274e8f7144a0e8f471355f64b41223095199a68f18fe63bcdc3730550
|
| 3 |
+
size 539
|
contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.062121/hparams.yml
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 40
|
| 2 |
+
grad_accum_steps: 3
|
| 3 |
+
learning_rate: 0.0005
|
| 4 |
+
mode: raw_text_frozen_audio
|
| 5 |
+
num_epochs: 100
|
contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/events.out.tfevents.1769148247.MACLAB-S004.302904.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fa0b781f4514bb879e7966e7b64a7d314194abd176a69d93d0fdc13b4faf225
|
| 3 |
+
size 10147608
|
contrastive/20260123_1403_tune_mulan_transformer/logs/train.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-23 14:04:07 | INFO | EMA enabled: decay=0.9999, update_every=1
|
| 2 |
+
2026-01-23 14:04:07 | INFO | Created model: concat_text_late mode, CA attention
|
| 3 |
+
2026-01-23 14:04:07 | INFO | Gradient checkpointing enabled
|
| 4 |
+
2026-01-23 14:04:07 | INFO | Created ContrastiveLoss: threshold_mode=momentum, percentile=0.5
|
| 5 |
+
2026-01-23 14:04:07 | INFO | Optimizer: AdamW (lr=0.0005, muqmulan_lr=0.0001)
|
| 6 |
+
2026-01-23 14:04:07 | INFO | Scheduler: Warmup(1000) + CosineAnnealing
|
| 7 |
+
2026-01-23 14:04:07 | INFO | Train: 1514 batches, Val: 8 batches
|
| 8 |
+
2026-01-23 14:04:07 | INFO | ✓ Audio cropping enabled: min=500, max=1200
|
| 9 |
+
2026-01-23 14:04:07 | INFO | Apply to eval: True, ref: True
|
| 10 |
+
2026-01-23 14:04:07 | INFO | Modes: train=random, val=center
|
| 11 |
+
2026-01-23 14:04:07 | INFO | Batch sizes: normal=40, matching_only=48
|
| 12 |
+
2026-01-23 14:04:07 | INFO | Parameters: 683.856M total, 71.623M trainable
|
| 13 |
+
2026-01-23 14:04:07 | INFO | Trainable modules: null_text_embedding, null_lyrics_embedding, null_audio_embedding, prompt_transformer, joint_transformer, score_projector, single_score_projector, alignment_head, quality_head, text_module, eval_downsampler, ref_downsampler
|
| 14 |
+
2026-01-23 14:04:07 | INFO | Trainer initialized on 2 GPU(s)
|
| 15 |
+
2026-01-23 14:04:07 | INFO | Mixed precision: bf16
|
contrastive/train.20260123_1403_tune_mulan_transformer.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
downstream/20260121_1942/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: null
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: false
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: false
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: false
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 1000
|
| 51 |
+
log_interval: 50
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:0
|
downstream/20260121_1942/train.log
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 19:42:29 | INFO | Starting downstream training: 20260121_1942
|
| 2 |
+
2026-01-21 19:42:29 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942
|
| 3 |
+
2026-01-21 19:42:29 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/config.yaml
|
| 4 |
+
2026-01-21 19:42:29 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-21 19:42:29 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 19:42:32 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 19:42:32 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 19:42:38 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 19:42:38 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 19:42:38 | INFO | Added linear head for task 'musicality'
|
| 11 |
+
2026-01-21 19:42:38 | INFO | Added linear head for task 'alignment'
|
| 12 |
+
2026-01-21 19:42:38 | INFO | Added linear head for task 'preference'
|
| 13 |
+
2026-01-21 19:42:38 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 14 |
+
2026-01-21 19:42:38 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 15 |
+
2026-01-21 19:42:38 | INFO | Task 'musicality': train=4322, test=913
|
| 16 |
+
2026-01-21 19:42:38 | INFO | Task 'alignment': train=1923, test=913
|
| 17 |
+
2026-01-21 19:42:38 | INFO | Task 'preference': train=1065, test=275
|
| 18 |
+
2026-01-21 19:42:38 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
|
| 19 |
+
2026-01-21 19:42:38 | INFO |
|
| 20 |
+
============================================================
|
| 21 |
+
2026-01-21 19:42:38 | INFO | Starting Task 1/3: musicality
|
| 22 |
+
2026-01-21 19:42:38 | INFO | ============================================================
|
| 23 |
+
2026-01-21 19:42:38 | INFO | Task 'musicality' trainable parameters: 769
|
| 24 |
+
2026-01-21 19:42:54 | INFO | [Task musicality][Step 50/1000] loss=2.4938 | mae=2.4938
|
| 25 |
+
2026-01-21 19:43:10 | INFO | [Task musicality][Step 100/1000] loss=1.0211 | mae=1.0211
|
| 26 |
+
2026-01-21 19:43:19 | INFO | [Task musicality][Step 150/1000] loss=0.8535 | mae=0.8535
|
| 27 |
+
2026-01-21 19:43:32 | INFO | [Task musicality][Step 200/1000] loss=0.7434 | mae=0.7434
|
| 28 |
+
2026-01-21 19:43:43 | INFO | [Task musicality][Step 250/1000] loss=0.7352 | mae=0.7352
|
| 29 |
+
2026-01-21 19:43:56 | INFO | [Task musicality][Step 300/1000] loss=0.6973 | mae=0.6973
|
| 30 |
+
2026-01-21 19:44:06 | INFO | [Task musicality][Step 350/1000] loss=0.6773 | mae=0.6773
|
| 31 |
+
2026-01-21 19:44:19 | INFO | [Task musicality][Step 400/1000] loss=0.6354 | mae=0.6354
|
| 32 |
+
2026-01-21 19:44:30 | INFO | [Task musicality][Step 450/1000] loss=0.6352 | mae=0.6352
|
| 33 |
+
2026-01-21 19:44:43 | INFO | [Task musicality][Step 500/1000] loss=0.6213 | mae=0.6213
|
| 34 |
+
2026-01-21 19:44:56 | INFO | [Task musicality][Step 550/1000] loss=0.6211 | mae=0.6211
|
| 35 |
+
2026-01-21 19:45:08 | INFO | [Task musicality][Step 600/1000] loss=0.5835 | mae=0.5835
|
| 36 |
+
2026-01-21 19:45:21 | INFO | [Task musicality][Step 650/1000] loss=0.5852 | mae=0.5852
|
| 37 |
+
2026-01-21 19:45:31 | INFO | [Task musicality][Step 700/1000] loss=0.5859 | mae=0.5859
|
| 38 |
+
2026-01-21 19:45:45 | INFO | [Task musicality][Step 750/1000] loss=0.5658 | mae=0.5658
|
| 39 |
+
2026-01-21 19:45:55 | INFO | [Task musicality][Step 800/1000] loss=0.5481 | mae=0.5481
|
| 40 |
+
2026-01-21 19:46:09 | INFO | [Task musicality][Step 850/1000] loss=0.5400 | mae=0.5400
|
| 41 |
+
2026-01-21 19:46:19 | INFO | [Task musicality][Step 900/1000] loss=0.5486 | mae=0.5486
|
| 42 |
+
2026-01-21 19:46:32 | INFO | [Task musicality][Step 950/1000] loss=0.5365 | mae=0.5365
|
| 43 |
+
2026-01-21 19:46:46 | INFO | [Task musicality][Step 1000/1000] loss=0.5278 | mae=0.5278
|
| 44 |
+
2026-01-21 19:46:52 | INFO | [Val] musicality: loss=1.4280 | mae=1.4280
|
| 45 |
+
2026-01-21 19:46:52 | INFO | Task 'musicality' complete. Running validation...
|
| 46 |
+
2026-01-21 19:46:57 | INFO | [Final Val for musicality] loss=1.4280 | mae=1.4280
|
| 47 |
+
2026-01-21 19:46:57 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/task_musicality_final.pt
|
| 48 |
+
2026-01-21 19:46:57 | INFO |
|
| 49 |
+
============================================================
|
| 50 |
+
2026-01-21 19:46:57 | INFO | Starting Task 2/3: alignment
|
| 51 |
+
2026-01-21 19:46:57 | INFO | ============================================================
|
| 52 |
+
2026-01-21 19:46:57 | INFO | Task 'alignment' trainable parameters: 769
|
| 53 |
+
2026-01-21 19:47:05 | INFO | [Task alignment][Step 50/1000] loss=2.4024 | mae=2.4024
|
| 54 |
+
2026-01-21 19:47:11 | INFO | [Task alignment][Step 100/1000] loss=0.9916 | mae=0.9916
|
| 55 |
+
2026-01-21 19:47:18 | INFO | [Task alignment][Step 150/1000] loss=0.8597 | mae=0.8597
|
| 56 |
+
2026-01-21 19:47:24 | INFO | [Task alignment][Step 200/1000] loss=0.7863 | mae=0.7863
|
| 57 |
+
2026-01-21 19:47:33 | INFO | [Task alignment][Step 250/1000] loss=0.7473 | mae=0.7473
|
| 58 |
+
2026-01-21 19:47:40 | INFO | [Task alignment][Step 300/1000] loss=0.7150 | mae=0.7150
|
| 59 |
+
2026-01-21 19:47:46 | INFO | [Task alignment][Step 350/1000] loss=0.7370 | mae=0.7370
|
| 60 |
+
2026-01-21 19:47:54 | INFO | [Task alignment][Step 400/1000] loss=0.7159 | mae=0.7159
|
| 61 |
+
2026-01-21 19:48:02 | INFO | [Task alignment][Step 450/1000] loss=0.6949 | mae=0.6949
|
| 62 |
+
2026-01-21 19:48:10 | INFO | [Task alignment][Step 500/1000] loss=0.6663 | mae=0.6663
|
| 63 |
+
2026-01-21 19:48:18 | INFO | [Task alignment][Step 550/1000] loss=0.6535 | mae=0.6535
|
| 64 |
+
2026-01-21 19:48:24 | INFO | [Task alignment][Step 600/1000] loss=0.6601 | mae=0.6601
|
| 65 |
+
2026-01-21 19:48:32 | INFO | [Task alignment][Step 650/1000] loss=0.6296 | mae=0.6296
|
| 66 |
+
2026-01-21 19:48:39 | INFO | [Task alignment][Step 700/1000] loss=0.5975 | mae=0.5975
|
| 67 |
+
2026-01-21 19:48:45 | INFO | [Task alignment][Step 750/1000] loss=0.5953 | mae=0.5953
|
| 68 |
+
2026-01-21 19:48:51 | INFO | [Task alignment][Step 800/1000] loss=0.5881 | mae=0.5881
|
| 69 |
+
2026-01-21 19:49:00 | INFO | [Task alignment][Step 850/1000] loss=0.5822 | mae=0.5822
|
| 70 |
+
2026-01-21 19:49:06 | INFO | [Task alignment][Step 900/1000] loss=0.5667 | mae=0.5667
|
| 71 |
+
2026-01-21 19:49:12 | INFO | [Task alignment][Step 950/1000] loss=0.5752 | mae=0.5752
|
| 72 |
+
2026-01-21 19:49:20 | INFO | [Task alignment][Step 1000/1000] loss=0.5615 | mae=0.5615
|
| 73 |
+
2026-01-21 19:49:23 | INFO | [Val] alignment: loss=1.3554 | mae=1.3554
|
| 74 |
+
2026-01-21 19:49:23 | INFO | Task 'alignment' complete. Running validation...
|
| 75 |
+
2026-01-21 19:49:28 | INFO | [Final Val for alignment] loss=1.3554 | mae=1.3554
|
| 76 |
+
2026-01-21 19:49:28 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/task_alignment_final.pt
|
| 77 |
+
2026-01-21 19:49:28 | INFO |
|
| 78 |
+
============================================================
|
| 79 |
+
2026-01-21 19:49:28 | INFO | Starting Task 3/3: preference
|
| 80 |
+
2026-01-21 19:49:28 | INFO | ============================================================
|
| 81 |
+
2026-01-21 19:49:28 | INFO | Task 'preference' trainable parameters: 769
|
| 82 |
+
2026-01-21 19:50:48 | INFO | [Task preference][Step 50/1000] loss=0.9186 | accuracy=0.5758
|
| 83 |
+
2026-01-21 19:51:56 | INFO | [Task preference][Step 100/1000] loss=0.5793 | accuracy=0.7038
|
| 84 |
+
2026-01-21 19:53:04 | INFO | [Task preference][Step 150/1000] loss=0.5304 | accuracy=0.7379
|
| 85 |
+
2026-01-21 19:54:19 | INFO | [Task preference][Step 200/1000] loss=0.5223 | accuracy=0.7467
|
| 86 |
+
2026-01-21 19:55:29 | INFO | [Task preference][Step 250/1000] loss=0.4683 | accuracy=0.7692
|
| 87 |
+
2026-01-21 19:56:38 | INFO | [Task preference][Step 300/1000] loss=0.4672 | accuracy=0.7746
|
| 88 |
+
2026-01-21 19:57:49 | INFO | [Task preference][Step 350/1000] loss=0.4507 | accuracy=0.7767
|
| 89 |
+
2026-01-21 19:59:03 | INFO | [Task preference][Step 400/1000] loss=0.4282 | accuracy=0.7896
|
| 90 |
+
2026-01-21 20:00:12 | INFO | [Task preference][Step 450/1000] loss=0.4177 | accuracy=0.8004
|
| 91 |
+
2026-01-21 20:01:21 | INFO | [Task preference][Step 500/1000] loss=0.4111 | accuracy=0.8008
|
| 92 |
+
2026-01-21 20:02:29 | INFO | [Task preference][Step 550/1000] loss=0.3968 | accuracy=0.8163
|
| 93 |
+
2026-01-21 20:03:42 | INFO | [Task preference][Step 600/1000] loss=0.3853 | accuracy=0.8217
|
| 94 |
+
2026-01-21 20:04:56 | INFO | [Task preference][Step 650/1000] loss=0.3910 | accuracy=0.8088
|
| 95 |
+
2026-01-21 20:06:09 | INFO | [Task preference][Step 700/1000] loss=0.3718 | accuracy=0.8300
|
| 96 |
+
2026-01-21 20:07:32 | INFO | [Task preference][Step 750/1000] loss=0.3781 | accuracy=0.8221
|
| 97 |
+
2026-01-21 20:08:42 | INFO | [Task preference][Step 800/1000] loss=0.3685 | accuracy=0.8263
|
| 98 |
+
2026-01-21 20:09:56 | INFO | [Task preference][Step 850/1000] loss=0.3704 | accuracy=0.8304
|
| 99 |
+
2026-01-21 20:11:07 | INFO | [Task preference][Step 900/1000] loss=0.3680 | accuracy=0.8279
|
| 100 |
+
2026-01-21 20:12:22 | INFO | [Task preference][Step 950/1000] loss=0.3623 | accuracy=0.8358
|
| 101 |
+
2026-01-21 20:13:32 | INFO | [Task preference][Step 1000/1000] loss=0.3644 | accuracy=0.8296
|
| 102 |
+
2026-01-21 20:13:44 | INFO | [Val] preference: loss=0.5676 | accuracy=0.7223
|
| 103 |
+
2026-01-21 20:13:44 | INFO | Task 'preference' complete. Running validation...
|
| 104 |
+
2026-01-21 20:13:52 | INFO | [Final Val for preference] loss=0.5676 | accuracy=0.7223
|
| 105 |
+
2026-01-21 20:13:52 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/task_preference_final.pt
|
| 106 |
+
2026-01-21 20:13:52 | INFO |
|
| 107 |
+
============================================================
|
| 108 |
+
2026-01-21 20:13:52 | INFO | All tasks complete. Running final validation for all tasks...
|
| 109 |
+
2026-01-21 20:13:57 | INFO | [Final Val] musicality: loss=1.4280 | mae=1.4280
|
| 110 |
+
2026-01-21 20:14:01 | INFO | [Final Val] alignment: loss=1.3554 | mae=1.3554
|
| 111 |
+
2026-01-21 20:14:09 | INFO | [Final Val] preference: loss=0.5676 | accuracy=0.7223
|
| 112 |
+
2026-01-21 20:14:09 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/downstream_final.pt
|
| 113 |
+
2026-01-21 20:14:09 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/downstream_final.pt
|
downstream/20260121_2108/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: score_projector
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: false
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: false
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: false
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 1000
|
| 51 |
+
log_interval: 50
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:0
|
downstream/20260121_2108/train.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 21:08:36 | INFO | Starting downstream training: 20260121_2108
|
| 2 |
+
2026-01-21 21:08:36 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2108
|
| 3 |
+
2026-01-21 21:08:36 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2108/config.yaml
|
| 4 |
+
2026-01-21 21:08:36 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-21 21:08:36 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 21:08:39 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 21:08:39 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 21:08:44 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 21:08:45 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 21:08:45 | INFO | Added linear head for task 'musicality'
|
| 11 |
+
2026-01-21 21:08:45 | INFO | Added linear head for task 'alignment'
|
| 12 |
+
2026-01-21 21:08:45 | INFO | Added linear head for task 'preference'
|
| 13 |
+
2026-01-21 21:08:45 | INFO | Initializing heads from backbone 'score_projector'
|
| 14 |
+
2026-01-21 21:08:45 | INFO | Initializing 3 heads from 'score_projector'
|
| 15 |
+
2026-01-21 21:08:45 | INFO | Task 'musicality': type=linear, ordinal=False
|
| 16 |
+
2026-01-21 21:08:45 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 17 |
+
2026-01-21 21:08:45 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 18 |
+
2026-01-21 21:08:45 | INFO | Initialized final linear layer from source
|
| 19 |
+
2026-01-21 21:08:45 | INFO | ✓ Head initialized from 'score_projector'
|
| 20 |
+
2026-01-21 21:08:45 | INFO | Task 'alignment': type=linear, ordinal=False
|
| 21 |
+
2026-01-21 21:08:45 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 22 |
+
2026-01-21 21:08:45 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 23 |
+
2026-01-21 21:08:45 | INFO | Initialized final linear layer from source
|
| 24 |
+
2026-01-21 21:08:45 | INFO | ✓ Head initialized from 'score_projector'
|
| 25 |
+
2026-01-21 21:08:45 | INFO | Task 'preference': type=linear, ordinal=False
|
| 26 |
+
2026-01-21 21:08:45 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
downstream/20260121_2112/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: score_projector
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: false
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: false
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: false
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 1000
|
| 51 |
+
log_interval: 50
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:0
|
downstream/20260121_2112/train.log
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 21:12:23 | INFO | Starting downstream training: 20260121_2112
|
| 2 |
+
2026-01-21 21:12:23 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2112
|
| 3 |
+
2026-01-21 21:12:23 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2112/config.yaml
|
| 4 |
+
2026-01-21 21:12:23 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-21 21:12:23 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 21:12:26 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 21:12:26 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 21:12:31 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 21:12:32 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 21:12:32 | INFO | Added linear head for task 'musicality'
|
| 11 |
+
2026-01-21 21:12:32 | INFO | Added linear head for task 'alignment'
|
| 12 |
+
2026-01-21 21:12:32 | INFO | Added linear head for task 'preference'
|
| 13 |
+
2026-01-21 21:12:32 | INFO | Initializing heads from backbone 'score_projector'
|
| 14 |
+
2026-01-21 21:12:32 | INFO | Initializing 3 heads from 'score_projector'
|
| 15 |
+
2026-01-21 21:12:32 | INFO | Task 'musicality': type=linear, ordinal=False
|
| 16 |
+
2026-01-21 21:12:32 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 17 |
+
2026-01-21 21:12:32 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 18 |
+
2026-01-21 21:12:32 | INFO | Initialized final linear layer from source
|
| 19 |
+
2026-01-21 21:12:32 | INFO | ✓ Head initialized from 'score_projector'
|
| 20 |
+
2026-01-21 21:12:32 | INFO | Task 'alignment': type=linear, ordinal=False
|
| 21 |
+
2026-01-21 21:12:32 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 22 |
+
2026-01-21 21:12:32 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 23 |
+
2026-01-21 21:12:32 | INFO | Initialized final linear layer from source
|
| 24 |
+
2026-01-21 21:12:32 | INFO | ✓ Head initialized from 'score_projector'
|
| 25 |
+
2026-01-21 21:12:32 | INFO | Task 'preference': type=linear, ordinal=False
|
| 26 |
+
2026-01-21 21:12:32 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 27 |
+
2026-01-21 21:12:32 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 28 |
+
2026-01-21 21:12:32 | INFO | Initialized final linear layer from source
|
| 29 |
+
2026-01-21 21:12:32 | INFO | ✓ Head initialized from 'score_projector'
|
| 30 |
+
2026-01-21 21:12:32 | INFO | ✓ All heads initialized
|
| 31 |
+
2026-01-21 21:12:32 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 32 |
+
2026-01-21 21:12:32 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 33 |
+
2026-01-21 21:12:32 | INFO | Task 'musicality': train=4322, test=913
|
| 34 |
+
2026-01-21 21:12:32 | INFO | Task 'alignment': train=1923, test=913
|
| 35 |
+
2026-01-21 21:12:32 | INFO | Task 'preference': train=1065, test=275
|
| 36 |
+
2026-01-21 21:12:32 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
|
| 37 |
+
2026-01-21 21:12:32 | INFO |
|
| 38 |
+
============================================================
|
| 39 |
+
2026-01-21 21:12:32 | INFO | Starting Task 1/3: musicality
|
| 40 |
+
2026-01-21 21:12:32 | INFO | ============================================================
|
| 41 |
+
2026-01-21 21:12:32 | INFO | Task 'musicality' trainable parameters: 769
|
downstream/20260121_2116/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: score_projector
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: false
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: false
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: false
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 1000
|
| 51 |
+
log_interval: 50
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:0
|
downstream/20260121_2116/train.log
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 21:16:13 | INFO | Starting downstream training: 20260121_2116
|
| 2 |
+
2026-01-21 21:16:13 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2116
|
| 3 |
+
2026-01-21 21:16:13 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2116/config.yaml
|
| 4 |
+
2026-01-21 21:16:13 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-21 21:16:13 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 21:16:16 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 21:16:16 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 21:16:22 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 21:16:22 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 21:16:22 | INFO | Added linear head for task 'musicality'
|
| 11 |
+
2026-01-21 21:16:22 | INFO | Added linear head for task 'alignment'
|
| 12 |
+
2026-01-21 21:16:22 | INFO | Added linear head for task 'preference'
|
| 13 |
+
2026-01-21 21:16:22 | INFO | Initializing heads from backbone 'score_projector'
|
| 14 |
+
2026-01-21 21:16:22 | INFO | Initializing 3 heads from 'score_projector'
|
| 15 |
+
2026-01-21 21:16:22 | INFO | Task 'musicality': type=linear, ordinal=False
|
| 16 |
+
2026-01-21 21:16:22 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 17 |
+
2026-01-21 21:16:23 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 18 |
+
2026-01-21 21:16:23 | INFO | Initialized final linear layer from source
|
| 19 |
+
2026-01-21 21:16:23 | INFO | ✓ Head initialized from 'score_projector'
|
| 20 |
+
2026-01-21 21:16:23 | INFO | Task 'alignment': type=linear, ordinal=False
|
| 21 |
+
2026-01-21 21:16:23 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 22 |
+
2026-01-21 21:16:23 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 23 |
+
2026-01-21 21:16:23 | INFO | Initialized final linear layer from source
|
| 24 |
+
2026-01-21 21:16:23 | INFO | ✓ Head initialized from 'score_projector'
|
| 25 |
+
2026-01-21 21:16:23 | INFO | Task 'preference': type=linear, ordinal=False
|
| 26 |
+
2026-01-21 21:16:23 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 27 |
+
2026-01-21 21:16:23 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 28 |
+
2026-01-21 21:16:23 | INFO | Initialized final linear layer from source
|
| 29 |
+
2026-01-21 21:16:23 | INFO | ✓ Head initialized from 'score_projector'
|
| 30 |
+
2026-01-21 21:16:23 | INFO | ✓ All heads initialized
|
| 31 |
+
2026-01-21 21:16:23 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 32 |
+
2026-01-21 21:16:23 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 33 |
+
2026-01-21 21:16:23 | INFO | Task 'musicality': train=4322, test=913
|
| 34 |
+
2026-01-21 21:16:23 | INFO | Task 'alignment': train=1923, test=913
|
| 35 |
+
2026-01-21 21:16:23 | INFO | Task 'preference': train=1065, test=275
|
| 36 |
+
2026-01-21 21:16:23 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
|
| 37 |
+
2026-01-21 21:16:23 | INFO |
|
| 38 |
+
============================================================
|
| 39 |
+
2026-01-21 21:16:23 | INFO | Starting Task 1/3: musicality
|
| 40 |
+
2026-01-21 21:16:23 | INFO | ============================================================
|
| 41 |
+
2026-01-21 21:16:23 | INFO | Task 'musicality' trainable parameters: 769
|
downstream/20260121_2117/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: score_projector
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: false
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: false
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: false
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 1000
|
| 51 |
+
log_interval: 50
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:1
|
downstream/20260121_2117/train.log
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 21:17:16 | INFO | Starting downstream training: 20260121_2117
|
| 2 |
+
2026-01-21 21:17:16 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117
|
| 3 |
+
2026-01-21 21:17:16 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117/config.yaml
|
| 4 |
+
2026-01-21 21:17:16 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-21 21:17:16 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 21:17:19 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 21:17:19 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 21:17:24 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 21:17:25 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 21:17:25 | INFO | Added linear head for task 'musicality'
|
| 11 |
+
2026-01-21 21:17:25 | INFO | Added linear head for task 'alignment'
|
| 12 |
+
2026-01-21 21:17:25 | INFO | Added linear head for task 'preference'
|
| 13 |
+
2026-01-21 21:17:25 | INFO | Initializing heads from backbone 'score_projector'
|
| 14 |
+
2026-01-21 21:17:25 | INFO | Initializing 3 heads from 'score_projector'
|
| 15 |
+
2026-01-21 21:17:25 | INFO | Task 'musicality': type=linear, ordinal=False
|
| 16 |
+
2026-01-21 21:17:25 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 17 |
+
2026-01-21 21:17:25 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 18 |
+
2026-01-21 21:17:25 | INFO | Initialized final linear layer from source
|
| 19 |
+
2026-01-21 21:17:25 | INFO | ✓ Head initialized from 'score_projector'
|
| 20 |
+
2026-01-21 21:17:25 | INFO | Task 'alignment': type=linear, ordinal=False
|
| 21 |
+
2026-01-21 21:17:25 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 22 |
+
2026-01-21 21:17:25 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 23 |
+
2026-01-21 21:17:25 | INFO | Initialized final linear layer from source
|
| 24 |
+
2026-01-21 21:17:25 | INFO | ✓ Head initialized from 'score_projector'
|
| 25 |
+
2026-01-21 21:17:25 | INFO | Task 'preference': type=linear, ordinal=False
|
| 26 |
+
2026-01-21 21:17:25 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
|
| 27 |
+
2026-01-21 21:17:25 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
|
| 28 |
+
2026-01-21 21:17:25 | INFO | Initialized final linear layer from source
|
| 29 |
+
2026-01-21 21:17:25 | INFO | ✓ Head initialized from 'score_projector'
|
| 30 |
+
2026-01-21 21:17:25 | INFO | ✓ All heads initialized
|
| 31 |
+
2026-01-21 21:17:25 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 32 |
+
2026-01-21 21:17:25 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 33 |
+
2026-01-21 21:17:25 | INFO | Task 'musicality': train=4322, test=913
|
| 34 |
+
2026-01-21 21:17:25 | INFO | Task 'alignment': train=1923, test=913
|
| 35 |
+
2026-01-21 21:17:25 | INFO | Task 'preference': train=1065, test=275
|
| 36 |
+
2026-01-21 21:17:25 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
|
| 37 |
+
2026-01-21 21:17:25 | INFO |
|
| 38 |
+
============================================================
|
| 39 |
+
2026-01-21 21:17:25 | INFO | Starting Task 1/3: musicality
|
| 40 |
+
2026-01-21 21:17:25 | INFO | ============================================================
|
| 41 |
+
2026-01-21 21:17:25 | INFO | Task 'musicality' trainable parameters: 769
|
| 42 |
+
2026-01-21 21:17:43 | INFO | [Task musicality][Step 50/1000] loss=1.9747 | mae=1.9747
|
| 43 |
+
2026-01-21 21:17:57 | INFO | [Task musicality][Step 100/1000] loss=0.9926 | mae=0.9926
|
| 44 |
+
2026-01-21 21:18:07 | INFO | [Task musicality][Step 150/1000] loss=0.8076 | mae=0.8076
|
| 45 |
+
2026-01-21 21:18:21 | INFO | [Task musicality][Step 200/1000] loss=0.7166 | mae=0.7166
|
| 46 |
+
2026-01-21 21:18:32 | INFO | [Task musicality][Step 250/1000] loss=0.7024 | mae=0.7024
|
| 47 |
+
2026-01-21 21:18:47 | INFO | [Task musicality][Step 300/1000] loss=0.6726 | mae=0.6726
|
| 48 |
+
2026-01-21 21:18:57 | INFO | [Task musicality][Step 350/1000] loss=0.6763 | mae=0.6763
|
| 49 |
+
2026-01-21 21:19:11 | INFO | [Task musicality][Step 400/1000] loss=0.6693 | mae=0.6693
|
| 50 |
+
2026-01-21 21:19:23 | INFO | [Task musicality][Step 450/1000] loss=0.6402 | mae=0.6402
|
| 51 |
+
2026-01-21 21:19:38 | INFO | [Task musicality][Step 500/1000] loss=0.5858 | mae=0.5858
|
| 52 |
+
2026-01-21 21:19:51 | INFO | [Task musicality][Step 550/1000] loss=0.6195 | mae=0.6195
|
| 53 |
+
2026-01-21 21:20:02 | INFO | [Task musicality][Step 600/1000] loss=0.5754 | mae=0.5754
|
| 54 |
+
2026-01-21 21:20:17 | INFO | [Task musicality][Step 650/1000] loss=0.5761 | mae=0.5761
|
| 55 |
+
2026-01-21 21:20:27 | INFO | [Task musicality][Step 700/1000] loss=0.5701 | mae=0.5701
|
| 56 |
+
2026-01-21 21:20:40 | INFO | [Task musicality][Step 750/1000] loss=0.5714 | mae=0.5714
|
| 57 |
+
2026-01-21 21:20:50 | INFO | [Task musicality][Step 800/1000] loss=0.5381 | mae=0.5381
|
| 58 |
+
2026-01-21 21:21:04 | INFO | [Task musicality][Step 850/1000] loss=0.5339 | mae=0.5339
|
| 59 |
+
2026-01-21 21:21:15 | INFO | [Task musicality][Step 900/1000] loss=0.5365 | mae=0.5365
|
| 60 |
+
2026-01-21 21:21:28 | INFO | [Task musicality][Step 950/1000] loss=0.5234 | mae=0.5234
|
| 61 |
+
2026-01-21 21:21:44 | INFO | [Task musicality][Step 1000/1000] loss=0.5216 | mae=0.5216
|
| 62 |
+
2026-01-21 21:21:52 | INFO | [Val] musicality: loss=1.1082 | mae=1.1082
|
| 63 |
+
2026-01-21 21:21:52 | INFO | Task 'musicality' complete. Running validation...
|
| 64 |
+
2026-01-21 21:21:56 | INFO | [Final Val for musicality] loss=1.1082 | mae=1.1082
|
| 65 |
+
2026-01-21 21:21:56 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117/ckpt/task_musicality_final.pt
|
| 66 |
+
2026-01-21 21:21:56 | INFO |
|
| 67 |
+
============================================================
|
| 68 |
+
2026-01-21 21:21:56 | INFO | Starting Task 2/3: alignment
|
| 69 |
+
2026-01-21 21:21:56 | INFO | ============================================================
|
| 70 |
+
2026-01-21 21:21:56 | INFO | Task 'alignment' trainable parameters: 769
|
| 71 |
+
2026-01-21 21:22:07 | INFO | [Task alignment][Step 50/1000] loss=2.4289 | mae=2.4289
|
| 72 |
+
2026-01-21 21:22:18 | INFO | [Task alignment][Step 100/1000] loss=1.0527 | mae=1.0527
|
| 73 |
+
2026-01-21 21:22:32 | INFO | [Task alignment][Step 150/1000] loss=0.8799 | mae=0.8799
|
| 74 |
+
2026-01-21 21:22:44 | INFO | [Task alignment][Step 200/1000] loss=0.7955 | mae=0.7955
|
| 75 |
+
2026-01-21 21:22:56 | INFO | [Task alignment][Step 250/1000] loss=0.7785 | mae=0.7785
|
| 76 |
+
2026-01-21 21:23:04 | INFO | [Task alignment][Step 300/1000] loss=0.7468 | mae=0.7468
|
| 77 |
+
2026-01-21 21:23:11 | INFO | [Task alignment][Step 350/1000] loss=0.7138 | mae=0.7138
|
| 78 |
+
2026-01-21 21:23:19 | INFO | [Task alignment][Step 400/1000] loss=0.6950 | mae=0.6950
|
| 79 |
+
2026-01-21 21:23:34 | INFO | [Task alignment][Step 450/1000] loss=0.6641 | mae=0.6641
|
| 80 |
+
2026-01-21 21:23:47 | INFO | [Task alignment][Step 500/1000] loss=0.6494 | mae=0.6494
|
| 81 |
+
2026-01-21 21:23:55 | INFO | [Task alignment][Step 550/1000] loss=0.6224 | mae=0.6224
|
| 82 |
+
2026-01-21 21:24:08 | INFO | [Task alignment][Step 600/1000] loss=0.6417 | mae=0.6417
|
| 83 |
+
2026-01-21 21:24:19 | INFO | [Task alignment][Step 650/1000] loss=0.6137 | mae=0.6137
|
| 84 |
+
2026-01-21 21:24:28 | INFO | [Task alignment][Step 700/1000] loss=0.5973 | mae=0.5973
|
| 85 |
+
2026-01-21 21:24:37 | INFO | [Task alignment][Step 750/1000] loss=0.5893 | mae=0.5893
|
| 86 |
+
2026-01-21 21:24:47 | INFO | [Task alignment][Step 800/1000] loss=0.5758 | mae=0.5758
|
| 87 |
+
2026-01-21 21:25:02 | INFO | [Task alignment][Step 850/1000] loss=0.5727 | mae=0.5727
|
| 88 |
+
2026-01-21 21:25:13 | INFO | [Task alignment][Step 900/1000] loss=0.5572 | mae=0.5572
|
| 89 |
+
2026-01-21 21:25:23 | INFO | [Task alignment][Step 950/1000] loss=0.5710 | mae=0.5710
|
| 90 |
+
2026-01-21 21:25:34 | INFO | [Task alignment][Step 1000/1000] loss=0.5488 | mae=0.5488
|
| 91 |
+
2026-01-21 21:25:38 | INFO | [Val] alignment: loss=1.2893 | mae=1.2893
|
| 92 |
+
2026-01-21 21:25:38 | INFO | Task 'alignment' complete. Running validation...
|
| 93 |
+
2026-01-21 21:25:43 | INFO | [Final Val for alignment] loss=1.2893 | mae=1.2893
|
| 94 |
+
2026-01-21 21:25:43 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117/ckpt/task_alignment_final.pt
|
| 95 |
+
2026-01-21 21:25:43 | INFO |
|
| 96 |
+
============================================================
|
| 97 |
+
2026-01-21 21:25:43 | INFO | Starting Task 3/3: preference
|
| 98 |
+
2026-01-21 21:25:43 | INFO | ============================================================
|
| 99 |
+
2026-01-21 21:25:43 | INFO | Task 'preference' trainable parameters: 769
|
| 100 |
+
2026-01-21 21:27:09 | INFO | [Task preference][Step 50/1000] loss=0.7531 | accuracy=0.6267
|
| 101 |
+
2026-01-21 21:28:26 | INFO | [Task preference][Step 100/1000] loss=0.5513 | accuracy=0.7275
|
| 102 |
+
2026-01-21 21:29:43 | INFO | [Task preference][Step 150/1000] loss=0.5132 | accuracy=0.7400
|
| 103 |
+
2026-01-21 21:31:03 | INFO | [Task preference][Step 200/1000] loss=0.5026 | accuracy=0.7579
|
| 104 |
+
2026-01-21 21:32:17 | INFO | [Task preference][Step 250/1000] loss=0.4554 | accuracy=0.7738
|
| 105 |
+
2026-01-21 21:33:30 | INFO | [Task preference][Step 300/1000] loss=0.4522 | accuracy=0.7754
|
| 106 |
+
2026-01-21 21:34:44 | INFO | [Task preference][Step 350/1000] loss=0.4400 | accuracy=0.7821
|
| 107 |
+
2026-01-21 21:36:04 | INFO | [Task preference][Step 400/1000] loss=0.4237 | accuracy=0.7988
|
| 108 |
+
2026-01-21 21:37:13 | INFO | [Task preference][Step 450/1000] loss=0.4104 | accuracy=0.8054
|
| 109 |
+
2026-01-21 21:38:22 | INFO | [Task preference][Step 500/1000] loss=0.4061 | accuracy=0.8067
|
| 110 |
+
2026-01-21 21:39:32 | INFO | [Task preference][Step 550/1000] loss=0.3888 | accuracy=0.8225
|
| 111 |
+
2026-01-21 21:40:46 | INFO | [Task preference][Step 600/1000] loss=0.3797 | accuracy=0.8213
|
| 112 |
+
2026-01-21 21:41:55 | INFO | [Task preference][Step 650/1000] loss=0.3849 | accuracy=0.8208
|
| 113 |
+
2026-01-21 21:43:08 | INFO | [Task preference][Step 700/1000] loss=0.3678 | accuracy=0.8329
|
| 114 |
+
2026-01-21 21:44:35 | INFO | [Task preference][Step 750/1000] loss=0.3720 | accuracy=0.8308
|
downstream/20260121_2145/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: score_projector
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: true
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: true
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: true
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 1000
|
| 51 |
+
log_interval: 50
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:2
|
downstream/20260121_2145/train.log
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 21:45:31 | INFO | Starting downstream training: 20260121_2145
|
| 2 |
+
2026-01-21 21:45:31 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145
|
| 3 |
+
2026-01-21 21:45:31 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/config.yaml
|
| 4 |
+
2026-01-21 21:45:31 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-21 21:45:31 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 21:45:34 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 21:45:34 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 21:45:39 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 21:45:40 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 21:45:40 | INFO | Added mlp head for task 'musicality'
|
| 11 |
+
2026-01-21 21:45:40 | INFO | Added mlp head for task 'alignment'
|
| 12 |
+
2026-01-21 21:45:40 | INFO | Added mlp head for task 'preference'
|
| 13 |
+
2026-01-21 21:45:40 | INFO | Initializing heads from backbone 'score_projector'
|
| 14 |
+
2026-01-21 21:45:40 | INFO | Initializing 3 heads from 'score_projector'
|
| 15 |
+
2026-01-21 21:45:40 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 16 |
+
2026-01-21 21:45:40 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 17 |
+
2026-01-21 21:45:40 | INFO | Loaded 4 parameters, 2 missing
|
| 18 |
+
2026-01-21 21:45:40 | INFO | ✓ Head initialized from 'score_projector'
|
| 19 |
+
2026-01-21 21:45:40 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 20 |
+
2026-01-21 21:45:40 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 21 |
+
2026-01-21 21:45:40 | INFO | Loaded 4 parameters, 2 missing
|
| 22 |
+
2026-01-21 21:45:40 | INFO | ✓ Head initialized from 'score_projector'
|
| 23 |
+
2026-01-21 21:45:40 | INFO | Task 'preference': type=mlp, ordinal=False
|
| 24 |
+
2026-01-21 21:45:40 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 25 |
+
2026-01-21 21:45:40 | INFO | Loaded 4 parameters, 2 missing
|
| 26 |
+
2026-01-21 21:45:40 | INFO | ✓ Head initialized from 'score_projector'
|
| 27 |
+
2026-01-21 21:45:40 | INFO | ✓ All heads initialized
|
| 28 |
+
2026-01-21 21:45:40 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 29 |
+
2026-01-21 21:45:40 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 30 |
+
2026-01-21 21:45:40 | INFO | Task 'musicality': train=4322, test=913
|
| 31 |
+
2026-01-21 21:45:40 | INFO | Task 'alignment': train=1923, test=913
|
| 32 |
+
2026-01-21 21:45:40 | INFO | Task 'preference': train=1065, test=275
|
| 33 |
+
2026-01-21 21:45:40 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
|
| 34 |
+
2026-01-21 21:45:40 | INFO |
|
| 35 |
+
============================================================
|
| 36 |
+
2026-01-21 21:45:40 | INFO | Starting Task 1/3: musicality
|
| 37 |
+
2026-01-21 21:45:40 | INFO | ============================================================
|
| 38 |
+
2026-01-21 21:45:40 | INFO | Task 'musicality' trainable parameters: 592,897
|
| 39 |
+
2026-01-21 21:46:00 | INFO | [Task musicality][Step 50/1000] loss=1.7786 | mae=1.7786
|
| 40 |
+
2026-01-21 21:46:18 | INFO | [Task musicality][Step 100/1000] loss=0.7543 | mae=0.7543
|
| 41 |
+
2026-01-21 21:46:30 | INFO | [Task musicality][Step 150/1000] loss=0.6762 | mae=0.6762
|
| 42 |
+
2026-01-21 21:46:45 | INFO | [Task musicality][Step 200/1000] loss=0.6404 | mae=0.6404
|
| 43 |
+
2026-01-21 21:46:59 | INFO | [Task musicality][Step 250/1000] loss=0.5936 | mae=0.5936
|
| 44 |
+
2026-01-21 21:47:15 | INFO | [Task musicality][Step 300/1000] loss=0.5754 | mae=0.5754
|
| 45 |
+
2026-01-21 21:47:30 | INFO | [Task musicality][Step 350/1000] loss=0.5510 | mae=0.5510
|
| 46 |
+
2026-01-21 21:47:43 | INFO | [Task musicality][Step 400/1000] loss=0.5132 | mae=0.5132
|
| 47 |
+
2026-01-21 21:47:54 | INFO | [Task musicality][Step 450/1000] loss=0.5287 | mae=0.5287
|
| 48 |
+
2026-01-21 21:48:12 | INFO | [Task musicality][Step 500/1000] loss=0.4958 | mae=0.4958
|
| 49 |
+
2026-01-21 21:48:30 | INFO | [Task musicality][Step 550/1000] loss=0.4961 | mae=0.4961
|
| 50 |
+
2026-01-21 21:48:43 | INFO | [Task musicality][Step 600/1000] loss=0.4833 | mae=0.4833
|
| 51 |
+
2026-01-21 21:48:59 | INFO | [Task musicality][Step 650/1000] loss=0.4410 | mae=0.4410
|
| 52 |
+
2026-01-21 21:49:14 | INFO | [Task musicality][Step 700/1000] loss=0.4480 | mae=0.4480
|
| 53 |
+
2026-01-21 21:49:34 | INFO | [Task musicality][Step 750/1000] loss=0.4393 | mae=0.4393
|
| 54 |
+
2026-01-21 21:49:49 | INFO | [Task musicality][Step 800/1000] loss=0.4295 | mae=0.4295
|
| 55 |
+
2026-01-21 21:50:07 | INFO | [Task musicality][Step 850/1000] loss=0.4026 | mae=0.4026
|
| 56 |
+
2026-01-21 21:50:23 | INFO | [Task musicality][Step 900/1000] loss=0.4080 | mae=0.4080
|
| 57 |
+
2026-01-21 21:50:41 | INFO | [Task musicality][Step 950/1000] loss=0.3985 | mae=0.3985
|
| 58 |
+
2026-01-21 21:50:58 | INFO | [Task musicality][Step 1000/1000] loss=0.4006 | mae=0.4006
|
| 59 |
+
2026-01-21 21:51:05 | INFO | [Val] musicality: loss=0.6058 | mae=0.6058
|
| 60 |
+
2026-01-21 21:51:05 | INFO | Task 'musicality' complete. Running validation...
|
| 61 |
+
2026-01-21 21:51:10 | INFO | [Final Val for musicality] loss=0.6058 | mae=0.6058
|
| 62 |
+
2026-01-21 21:51:10 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/task_musicality_final.pt
|
| 63 |
+
2026-01-21 21:51:10 | INFO |
|
| 64 |
+
============================================================
|
| 65 |
+
2026-01-21 21:51:10 | INFO | Starting Task 2/3: alignment
|
| 66 |
+
2026-01-21 21:51:10 | INFO | ============================================================
|
| 67 |
+
2026-01-21 21:51:10 | INFO | Task 'alignment' trainable parameters: 592,897
|
| 68 |
+
2026-01-21 21:51:26 | INFO | [Task alignment][Step 50/1000] loss=1.8262 | mae=1.8262
|
| 69 |
+
2026-01-21 21:51:38 | INFO | [Task alignment][Step 100/1000] loss=0.7283 | mae=0.7283
|
| 70 |
+
2026-01-21 21:51:50 | INFO | [Task alignment][Step 150/1000] loss=0.6792 | mae=0.6792
|
| 71 |
+
2026-01-21 21:52:03 | INFO | [Task alignment][Step 200/1000] loss=0.5979 | mae=0.5979
|
| 72 |
+
2026-01-21 21:52:14 | INFO | [Task alignment][Step 250/1000] loss=0.5766 | mae=0.5766
|
| 73 |
+
2026-01-21 21:52:24 | INFO | [Task alignment][Step 300/1000] loss=0.5427 | mae=0.5427
|
| 74 |
+
2026-01-21 21:52:34 | INFO | [Task alignment][Step 350/1000] loss=0.5330 | mae=0.5330
|
| 75 |
+
2026-01-21 21:52:40 | INFO | [Task alignment][Step 400/1000] loss=0.5115 | mae=0.5115
|
| 76 |
+
2026-01-21 21:52:48 | INFO | [Task alignment][Step 450/1000] loss=0.4688 | mae=0.4688
|
| 77 |
+
2026-01-21 21:52:54 | INFO | [Task alignment][Step 500/1000] loss=0.4563 | mae=0.4563
|
| 78 |
+
2026-01-21 21:53:00 | INFO | [Task alignment][Step 550/1000] loss=0.4392 | mae=0.4392
|
| 79 |
+
2026-01-21 21:53:10 | INFO | [Task alignment][Step 600/1000] loss=0.4127 | mae=0.4127
|
| 80 |
+
2026-01-21 21:53:20 | INFO | [Task alignment][Step 650/1000] loss=0.3947 | mae=0.3947
|
| 81 |
+
2026-01-21 21:53:28 | INFO | [Task alignment][Step 700/1000] loss=0.3776 | mae=0.3776
|
| 82 |
+
2026-01-21 21:53:37 | INFO | [Task alignment][Step 750/1000] loss=0.3523 | mae=0.3523
|
| 83 |
+
2026-01-21 21:53:45 | INFO | [Task alignment][Step 800/1000] loss=0.3439 | mae=0.3439
|
| 84 |
+
2026-01-21 21:53:55 | INFO | [Task alignment][Step 850/1000] loss=0.3254 | mae=0.3254
|
| 85 |
+
2026-01-21 21:54:08 | INFO | [Task alignment][Step 900/1000] loss=0.3240 | mae=0.3240
|
| 86 |
+
2026-01-21 21:54:21 | INFO | [Task alignment][Step 950/1000] loss=0.3232 | mae=0.3232
|
| 87 |
+
2026-01-21 21:54:33 | INFO | [Task alignment][Step 1000/1000] loss=0.3232 | mae=0.3232
|
| 88 |
+
2026-01-21 21:54:39 | INFO | [Val] alignment: loss=0.6060 | mae=0.6060
|
| 89 |
+
2026-01-21 21:54:39 | INFO | Task 'alignment' complete. Running validation...
|
| 90 |
+
2026-01-21 21:54:44 | INFO | [Final Val for alignment] loss=0.6060 | mae=0.6060
|
| 91 |
+
2026-01-21 21:54:44 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/task_alignment_final.pt
|
| 92 |
+
2026-01-21 21:54:44 | INFO |
|
| 93 |
+
============================================================
|
| 94 |
+
2026-01-21 21:54:44 | INFO | Starting Task 3/3: preference
|
| 95 |
+
2026-01-21 21:54:44 | INFO | ============================================================
|
| 96 |
+
2026-01-21 21:54:44 | INFO | Task 'preference' trainable parameters: 592,897
|
| 97 |
+
2026-01-21 21:56:24 | INFO | [Task preference][Step 50/1000] loss=0.5985 | accuracy=0.6663
|
| 98 |
+
2026-01-21 21:57:46 | INFO | [Task preference][Step 100/1000] loss=0.4507 | accuracy=0.7896
|
| 99 |
+
2026-01-21 21:59:02 | INFO | [Task preference][Step 150/1000] loss=0.3691 | accuracy=0.8363
|
| 100 |
+
2026-01-21 22:00:22 | INFO | [Task preference][Step 200/1000] loss=0.2699 | accuracy=0.8992
|
| 101 |
+
2026-01-21 22:01:38 | INFO | [Task preference][Step 250/1000] loss=0.1960 | accuracy=0.9267
|
| 102 |
+
2026-01-21 22:02:51 | INFO | [Task preference][Step 300/1000] loss=0.1390 | accuracy=0.9500
|
| 103 |
+
2026-01-21 22:04:05 | INFO | [Task preference][Step 350/1000] loss=0.1032 | accuracy=0.9683
|
| 104 |
+
2026-01-21 22:05:24 | INFO | [Task preference][Step 400/1000] loss=0.0626 | accuracy=0.9842
|
| 105 |
+
2026-01-21 22:06:58 | INFO | [Task preference][Step 450/1000] loss=0.0451 | accuracy=0.9908
|
| 106 |
+
2026-01-21 22:08:26 | INFO | [Task preference][Step 500/1000] loss=0.0280 | accuracy=0.9958
|
| 107 |
+
2026-01-21 22:09:49 | INFO | [Task preference][Step 550/1000] loss=0.0195 | accuracy=0.9979
|
| 108 |
+
2026-01-21 22:11:22 | INFO | [Task preference][Step 600/1000] loss=0.0139 | accuracy=0.9996
|
| 109 |
+
2026-01-21 22:12:48 | INFO | [Task preference][Step 650/1000] loss=0.0127 | accuracy=0.9988
|
| 110 |
+
2026-01-21 22:14:19 | INFO | [Task preference][Step 700/1000] loss=0.0113 | accuracy=0.9992
|
| 111 |
+
2026-01-21 22:15:52 | INFO | [Task preference][Step 750/1000] loss=0.0107 | accuracy=0.9996
|
| 112 |
+
2026-01-21 22:17:16 | INFO | [Task preference][Step 800/1000] loss=0.0084 | accuracy=1.0000
|
| 113 |
+
2026-01-21 22:18:49 | INFO | [Task preference][Step 850/1000] loss=0.0088 | accuracy=1.0000
|
| 114 |
+
2026-01-21 22:20:27 | INFO | [Task preference][Step 900/1000] loss=0.0086 | accuracy=0.9996
|
| 115 |
+
2026-01-21 22:22:09 | INFO | [Task preference][Step 950/1000] loss=0.0077 | accuracy=0.9996
|
| 116 |
+
2026-01-21 22:23:32 | INFO | [Task preference][Step 1000/1000] loss=0.0081 | accuracy=0.9996
|
| 117 |
+
2026-01-21 22:23:45 | INFO | [Val] preference: loss=1.1195 | accuracy=0.7176
|
| 118 |
+
2026-01-21 22:23:45 | INFO | Task 'preference' complete. Running validation...
|
| 119 |
+
2026-01-21 22:23:55 | INFO | [Final Val for preference] loss=1.1195 | accuracy=0.7176
|
| 120 |
+
2026-01-21 22:23:55 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/task_preference_final.pt
|
| 121 |
+
2026-01-21 22:23:55 | INFO |
|
| 122 |
+
============================================================
|
| 123 |
+
2026-01-21 22:23:55 | INFO | All tasks complete. Running final validation for all tasks...
|
| 124 |
+
2026-01-21 22:24:01 | INFO | [Final Val] musicality: loss=0.6058 | mae=0.6058
|
| 125 |
+
2026-01-21 22:24:08 | INFO | [Final Val] alignment: loss=0.6060 | mae=0.6060
|
| 126 |
+
2026-01-21 22:24:16 | INFO | [Final Val] preference: loss=1.1195 | accuracy=0.7176
|
| 127 |
+
2026-01-21 22:24:16 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/downstream_final.pt
|
| 128 |
+
2026-01-21 22:24:16 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/downstream_final.pt
|
downstream/20260121_2200/config.yaml
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
backbone:
|
| 10 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
freeze: true
|
| 12 |
+
from_ema: false
|
| 13 |
+
dataset:
|
| 14 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 15 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 16 |
+
heads:
|
| 17 |
+
hidden_dim: 768
|
| 18 |
+
init_from: score_projector
|
| 19 |
+
musicality:
|
| 20 |
+
use_mlp: true
|
| 21 |
+
ordinal: false
|
| 22 |
+
dropout: 0.1
|
| 23 |
+
num_categories: 9
|
| 24 |
+
y_min: 1.0
|
| 25 |
+
y_max: 5.0
|
| 26 |
+
step: 0.5
|
| 27 |
+
type: ordinal
|
| 28 |
+
tau: 1.0
|
| 29 |
+
alignment:
|
| 30 |
+
use_mlp: true
|
| 31 |
+
ordinal: false
|
| 32 |
+
dropout: 0.1
|
| 33 |
+
num_categories: 9
|
| 34 |
+
y_min: 1.0
|
| 35 |
+
y_max: 5.0
|
| 36 |
+
step: 0.5
|
| 37 |
+
type: ordinal
|
| 38 |
+
tau: 1.0
|
| 39 |
+
preference:
|
| 40 |
+
use_mlp: true
|
| 41 |
+
dropout: 0.1
|
| 42 |
+
train:
|
| 43 |
+
num_epochs: 10
|
| 44 |
+
num_train_steps: 2000
|
| 45 |
+
batch_size: 48
|
| 46 |
+
learning_rate: 0.001
|
| 47 |
+
weight_decay: 0.01
|
| 48 |
+
max_grad_norm: 1.0
|
| 49 |
+
warmup_steps: 100
|
| 50 |
+
schedule_type: cosine
|
| 51 |
+
min_lr_ratio: 0.01
|
| 52 |
+
dataset_mode: sequential
|
| 53 |
+
steps_per_task: 1000
|
| 54 |
+
log_interval: 50
|
| 55 |
+
val_interval: 1000
|
| 56 |
+
save_interval: 1000
|
| 57 |
+
num_workers: 8
|
| 58 |
+
resume: null
|
| 59 |
+
device: cuda:2
|
downstream/20260121_2200/train.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 22:00:33 | INFO | Starting downstream training: 20260121_2200
|
| 2 |
+
2026-01-21 22:00:33 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2200
|
| 3 |
+
2026-01-21 22:00:33 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2200/config.yaml
|
| 4 |
+
2026-01-21 22:00:33 | INFO | Training tasks: ['musicality', 'alignment']
|
| 5 |
+
2026-01-21 22:00:33 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 22:00:35 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 22:00:35 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 22:00:41 | WARNING | Missing keys: 283
|
downstream/20260121_2202/config.yaml
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
backbone:
|
| 10 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
freeze: true
|
| 12 |
+
from_ema: false
|
| 13 |
+
dataset:
|
| 14 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 15 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 16 |
+
heads:
|
| 17 |
+
hidden_dim: 768
|
| 18 |
+
init_from: score_projector
|
| 19 |
+
musicality:
|
| 20 |
+
use_mlp: true
|
| 21 |
+
ordinal: false
|
| 22 |
+
dropout: 0.1
|
| 23 |
+
num_categories: 9
|
| 24 |
+
y_min: 1.0
|
| 25 |
+
y_max: 5.0
|
| 26 |
+
step: 0.5
|
| 27 |
+
type: ordinal
|
| 28 |
+
tau: 1.0
|
| 29 |
+
alignment:
|
| 30 |
+
use_mlp: true
|
| 31 |
+
ordinal: false
|
| 32 |
+
dropout: 0.1
|
| 33 |
+
num_categories: 9
|
| 34 |
+
y_min: 1.0
|
| 35 |
+
y_max: 5.0
|
| 36 |
+
step: 0.5
|
| 37 |
+
type: ordinal
|
| 38 |
+
tau: 1.0
|
| 39 |
+
preference:
|
| 40 |
+
use_mlp: true
|
| 41 |
+
dropout: 0.1
|
| 42 |
+
train:
|
| 43 |
+
num_epochs: 10
|
| 44 |
+
num_train_steps: 2000
|
| 45 |
+
batch_size: 48
|
| 46 |
+
learning_rate: 0.001
|
| 47 |
+
weight_decay: 0.01
|
| 48 |
+
max_grad_norm: 1.0
|
| 49 |
+
warmup_steps: 100
|
| 50 |
+
schedule_type: cosine
|
| 51 |
+
min_lr_ratio: 0.01
|
| 52 |
+
dataset_mode: sequential
|
| 53 |
+
steps_per_task: 1000
|
| 54 |
+
log_interval: 50
|
| 55 |
+
val_interval: 1000
|
| 56 |
+
save_interval: 1000
|
| 57 |
+
num_workers: 8
|
| 58 |
+
resume: null
|
| 59 |
+
device: cuda:3
|
downstream/20260121_2202/train.log
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 22:02:15 | INFO | Starting downstream training: 20260121_2202
|
| 2 |
+
2026-01-21 22:02:15 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2202
|
| 3 |
+
2026-01-21 22:02:15 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2202/config.yaml
|
| 4 |
+
2026-01-21 22:02:15 | INFO | Training tasks: ['musicality', 'alignment']
|
| 5 |
+
2026-01-21 22:02:15 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 22:02:18 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 22:02:18 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 22:02:24 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 22:02:24 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 22:02:24 | INFO | Added mlp head for task 'musicality'
|
| 11 |
+
2026-01-21 22:02:24 | INFO | Added mlp head for task 'alignment'
|
| 12 |
+
2026-01-21 22:02:24 | INFO | Initializing heads from backbone 'score_projector'
|
| 13 |
+
2026-01-21 22:02:24 | INFO | Initializing 2 heads from 'score_projector'
|
| 14 |
+
2026-01-21 22:02:24 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 15 |
+
2026-01-21 22:02:24 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 16 |
+
2026-01-21 22:02:24 | INFO | Loaded 4 parameters, 2 missing
|
| 17 |
+
2026-01-21 22:02:24 | INFO | ✓ Head initialized from 'score_projector'
|
| 18 |
+
2026-01-21 22:02:24 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 19 |
+
2026-01-21 22:02:24 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 20 |
+
2026-01-21 22:02:24 | INFO | Loaded 4 parameters, 2 missing
|
| 21 |
+
2026-01-21 22:02:24 | INFO | ✓ Head initialized from 'score_projector'
|
| 22 |
+
2026-01-21 22:02:24 | INFO | ✓ All heads initialized
|
| 23 |
+
2026-01-21 22:02:24 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 24 |
+
2026-01-21 22:02:24 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 25 |
+
2026-01-21 22:02:24 | INFO | Task 'musicality': train=4322, test=913
|
| 26 |
+
2026-01-21 22:02:24 | INFO | Task 'alignment': train=1923, test=913
|
| 27 |
+
2026-01-21 22:02:24 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 1000 steps each
|
| 28 |
+
2026-01-21 22:02:24 | INFO |
|
| 29 |
+
============================================================
|
| 30 |
+
2026-01-21 22:02:24 | INFO | Starting Task 1/2: musicality
|
| 31 |
+
2026-01-21 22:02:24 | INFO | ============================================================
|
| 32 |
+
2026-01-21 22:02:24 | INFO | Task 'musicality' trainable parameters: 592,897
|
| 33 |
+
2026-01-21 22:02:44 | INFO | [Task musicality][Step 50/1000] loss=1.7686 | mae=1.7686
|
| 34 |
+
2026-01-21 22:02:58 | INFO | [Task musicality][Step 100/1000] loss=0.7030 | mae=0.7030
|
downstream/20260121_2203/config.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
backbone:
|
| 10 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
freeze: true
|
| 12 |
+
from_ema: false
|
| 13 |
+
dataset:
|
| 14 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 15 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 16 |
+
heads:
|
| 17 |
+
hidden_dim: 768
|
| 18 |
+
init_from: score_projector
|
| 19 |
+
musicality:
|
| 20 |
+
use_mlp: true
|
| 21 |
+
ordinal: true
|
| 22 |
+
dropout: 0.1
|
| 23 |
+
num_categories: 9
|
| 24 |
+
y_min: 1.0
|
| 25 |
+
y_max: 5.0
|
| 26 |
+
step: 0.5
|
| 27 |
+
alignment:
|
| 28 |
+
use_mlp: true
|
| 29 |
+
ordinal: true
|
| 30 |
+
dropout: 0.1
|
| 31 |
+
num_categories: 9
|
| 32 |
+
y_min: 1.0
|
| 33 |
+
y_max: 5.0
|
| 34 |
+
step: 0.5
|
| 35 |
+
preference:
|
| 36 |
+
use_mlp: true
|
| 37 |
+
dropout: 0.1
|
| 38 |
+
train:
|
| 39 |
+
num_epochs: 10
|
| 40 |
+
num_train_steps: 2000
|
| 41 |
+
batch_size: 48
|
| 42 |
+
learning_rate: 0.001
|
| 43 |
+
weight_decay: 0.01
|
| 44 |
+
max_grad_norm: 1.0
|
| 45 |
+
warmup_steps: 100
|
| 46 |
+
schedule_type: cosine
|
| 47 |
+
min_lr_ratio: 0.01
|
| 48 |
+
dataset_mode: sequential
|
| 49 |
+
steps_per_task: 1000
|
| 50 |
+
log_interval: 50
|
| 51 |
+
val_interval: 1000
|
| 52 |
+
save_interval: 1000
|
| 53 |
+
num_workers: 8
|
| 54 |
+
resume: null
|
| 55 |
+
device: cuda:3
|
downstream/20260121_2203/train.log
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 22:03:30 | INFO | Starting downstream training: 20260121_2203
|
| 2 |
+
2026-01-21 22:03:30 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203
|
| 3 |
+
2026-01-21 22:03:30 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/config.yaml
|
| 4 |
+
2026-01-21 22:03:30 | INFO | Training tasks: ['musicality', 'alignment']
|
| 5 |
+
2026-01-21 22:03:30 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 22:03:32 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 22:03:33 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 22:03:38 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 22:03:39 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 22:03:39 | INFO | Added ordinal head for task 'musicality'
|
| 11 |
+
2026-01-21 22:03:39 | INFO | Added ordinal head for task 'alignment'
|
| 12 |
+
2026-01-21 22:03:39 | INFO | Initializing heads from backbone 'score_projector'
|
| 13 |
+
2026-01-21 22:03:39 | INFO | Initializing 2 heads from 'score_projector'
|
| 14 |
+
2026-01-21 22:03:39 | INFO | Task 'musicality': type=mlp, ordinal=True
|
| 15 |
+
2026-01-21 22:03:39 | INFO | Initializing Ordinal head from 'score_projector' (from_ema=False)
|
| 16 |
+
2026-01-21 22:03:39 | INFO | Loaded 4 parameters, 2 missing
|
| 17 |
+
2026-01-21 22:03:39 | INFO | ✓ Ordinal head MLP initialized from 'score_projector'
|
| 18 |
+
2026-01-21 22:03:39 | INFO | Note: Ordinal thresholds remain randomly initialized
|
| 19 |
+
2026-01-21 22:03:39 | INFO | Task 'alignment': type=mlp, ordinal=True
|
| 20 |
+
2026-01-21 22:03:39 | INFO | Initializing Ordinal head from 'score_projector' (from_ema=False)
|
| 21 |
+
2026-01-21 22:03:39 | INFO | Loaded 4 parameters, 2 missing
|
| 22 |
+
2026-01-21 22:03:39 | INFO | ✓ Ordinal head MLP initialized from 'score_projector'
|
| 23 |
+
2026-01-21 22:03:39 | INFO | Note: Ordinal thresholds remain randomly initialized
|
| 24 |
+
2026-01-21 22:03:39 | INFO | ✓ All heads initialized
|
| 25 |
+
2026-01-21 22:03:39 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 26 |
+
2026-01-21 22:03:39 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 27 |
+
2026-01-21 22:03:39 | INFO | Task 'musicality': train=4322, test=913
|
| 28 |
+
2026-01-21 22:03:39 | INFO | Task 'alignment': train=1923, test=913
|
| 29 |
+
2026-01-21 22:03:39 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 1000 steps each
|
| 30 |
+
2026-01-21 22:03:39 | INFO |
|
| 31 |
+
============================================================
|
| 32 |
+
2026-01-21 22:03:39 | INFO | Starting Task 1/2: musicality
|
| 33 |
+
2026-01-21 22:03:39 | INFO | ============================================================
|
| 34 |
+
2026-01-21 22:03:39 | INFO | Task 'musicality' trainable parameters: 592,906
|
| 35 |
+
2026-01-21 22:04:02 | INFO | [Task musicality][Step 50/1000] loss=2.0734 | mae=0.7631
|
| 36 |
+
2026-01-21 22:04:18 | INFO | [Task musicality][Step 100/1000] loss=1.9539 | mae=0.5993
|
| 37 |
+
2026-01-21 22:04:30 | INFO | [Task musicality][Step 150/1000] loss=1.9282 | mae=0.5796
|
| 38 |
+
2026-01-21 22:04:46 | INFO | [Task musicality][Step 200/1000] loss=1.8968 | mae=0.5612
|
| 39 |
+
2026-01-21 22:04:58 | INFO | [Task musicality][Step 250/1000] loss=1.8660 | mae=0.5441
|
| 40 |
+
2026-01-21 22:05:13 | INFO | [Task musicality][Step 300/1000] loss=1.8148 | mae=0.5014
|
| 41 |
+
2026-01-21 22:05:26 | INFO | [Task musicality][Step 350/1000] loss=1.8131 | mae=0.5150
|
| 42 |
+
2026-01-21 22:05:43 | INFO | [Task musicality][Step 400/1000] loss=1.7905 | mae=0.5027
|
| 43 |
+
2026-01-21 22:05:57 | INFO | [Task musicality][Step 450/1000] loss=1.7558 | mae=0.4793
|
| 44 |
+
2026-01-21 22:06:14 | INFO | [Task musicality][Step 500/1000] loss=1.7351 | mae=0.4567
|
| 45 |
+
2026-01-21 22:06:31 | INFO | [Task musicality][Step 550/1000] loss=1.7292 | mae=0.4727
|
| 46 |
+
2026-01-21 22:06:44 | INFO | [Task musicality][Step 600/1000] loss=1.6897 | mae=0.4319
|
| 47 |
+
2026-01-21 22:07:02 | INFO | [Task musicality][Step 650/1000] loss=1.6883 | mae=0.4249
|
| 48 |
+
2026-01-21 22:07:16 | INFO | [Task musicality][Step 700/1000] loss=1.6641 | mae=0.4092
|
| 49 |
+
2026-01-21 22:07:35 | INFO | [Task musicality][Step 750/1000] loss=1.6474 | mae=0.3982
|
| 50 |
+
2026-01-21 22:07:48 | INFO | [Task musicality][Step 800/1000] loss=1.6376 | mae=0.3938
|
| 51 |
+
2026-01-21 22:08:06 | INFO | [Task musicality][Step 850/1000] loss=1.6277 | mae=0.3840
|
| 52 |
+
2026-01-21 22:08:19 | INFO | [Task musicality][Step 900/1000] loss=1.6306 | mae=0.3790
|
| 53 |
+
2026-01-21 22:08:35 | INFO | [Task musicality][Step 950/1000] loss=1.6176 | mae=0.3722
|
| 54 |
+
2026-01-21 22:08:53 | INFO | [Task musicality][Step 1000/1000] loss=1.6133 | mae=0.3613
|
| 55 |
+
2026-01-21 22:09:02 | INFO | [Val] musicality: loss=2.0307 | mae=0.7057
|
| 56 |
+
2026-01-21 22:09:02 | INFO | Task 'musicality' complete. Running validation...
|
| 57 |
+
2026-01-21 22:09:09 | INFO | [Final Val for musicality] loss=2.0307 | mae=0.7057
|
| 58 |
+
2026-01-21 22:09:09 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/task_musicality_final.pt
|
| 59 |
+
2026-01-21 22:09:09 | INFO |
|
| 60 |
+
============================================================
|
| 61 |
+
2026-01-21 22:09:09 | INFO | Starting Task 2/2: alignment
|
| 62 |
+
2026-01-21 22:09:09 | INFO | ============================================================
|
| 63 |
+
2026-01-21 22:09:09 | INFO | Task 'alignment' trainable parameters: 592,906
|
| 64 |
+
2026-01-21 22:09:22 | INFO | [Task alignment][Step 50/1000] loss=2.0490 | mae=0.6992
|
| 65 |
+
2026-01-21 22:09:34 | INFO | [Task alignment][Step 100/1000] loss=1.9639 | mae=0.5939
|
| 66 |
+
2026-01-21 22:09:47 | INFO | [Task alignment][Step 150/1000] loss=1.9356 | mae=0.5761
|
| 67 |
+
2026-01-21 22:09:59 | INFO | [Task alignment][Step 200/1000] loss=1.8931 | mae=0.5401
|
| 68 |
+
2026-01-21 22:10:12 | INFO | [Task alignment][Step 250/1000] loss=1.8464 | mae=0.5062
|
| 69 |
+
2026-01-21 22:10:25 | INFO | [Task alignment][Step 300/1000] loss=1.8200 | mae=0.4874
|
| 70 |
+
2026-01-21 22:10:37 | INFO | [Task alignment][Step 350/1000] loss=1.7858 | mae=0.4652
|
| 71 |
+
2026-01-21 22:10:47 | INFO | [Task alignment][Step 400/1000] loss=1.7651 | mae=0.4574
|
| 72 |
+
2026-01-21 22:11:04 | INFO | [Task alignment][Step 450/1000] loss=1.7110 | mae=0.4072
|
| 73 |
+
2026-01-21 22:11:17 | INFO | [Task alignment][Step 500/1000] loss=1.6871 | mae=0.3807
|
| 74 |
+
2026-01-21 22:11:30 | INFO | [Task alignment][Step 550/1000] loss=1.6525 | mae=0.3685
|
| 75 |
+
2026-01-21 22:11:43 | INFO | [Task alignment][Step 600/1000] loss=1.6413 | mae=0.3528
|
| 76 |
+
2026-01-21 22:11:58 | INFO | [Task alignment][Step 650/1000] loss=1.6069 | mae=0.3283
|
| 77 |
+
2026-01-21 22:12:12 | INFO | [Task alignment][Step 700/1000] loss=1.5852 | mae=0.3045
|
| 78 |
+
2026-01-21 22:12:26 | INFO | [Task alignment][Step 750/1000] loss=1.5635 | mae=0.2865
|
| 79 |
+
2026-01-21 22:12:38 | INFO | [Task alignment][Step 800/1000] loss=1.5526 | mae=0.2740
|
| 80 |
+
2026-01-21 22:12:53 | INFO | [Task alignment][Step 850/1000] loss=1.5392 | mae=0.2679
|
| 81 |
+
2026-01-21 22:13:07 | INFO | [Task alignment][Step 900/1000] loss=1.5294 | mae=0.2544
|
| 82 |
+
2026-01-21 22:13:21 | INFO | [Task alignment][Step 950/1000] loss=1.5273 | mae=0.2547
|
| 83 |
+
2026-01-21 22:13:36 | INFO | [Task alignment][Step 1000/1000] loss=1.5287 | mae=0.2516
|
| 84 |
+
2026-01-21 22:13:44 | INFO | [Val] alignment: loss=1.9828 | mae=0.6563
|
| 85 |
+
2026-01-21 22:13:44 | INFO | Task 'alignment' complete. Running validation...
|
| 86 |
+
2026-01-21 22:13:52 | INFO | [Final Val for alignment] loss=1.9828 | mae=0.6563
|
| 87 |
+
2026-01-21 22:13:52 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/task_alignment_final.pt
|
| 88 |
+
2026-01-21 22:13:52 | INFO |
|
| 89 |
+
============================================================
|
| 90 |
+
2026-01-21 22:13:52 | INFO | All tasks complete. Running final validation for all tasks...
|
| 91 |
+
2026-01-21 22:14:00 | INFO | [Final Val] musicality: loss=2.0307 | mae=0.7057
|
| 92 |
+
2026-01-21 22:14:07 | INFO | [Final Val] alignment: loss=1.9828 | mae=0.6563
|
| 93 |
+
2026-01-21 22:14:07 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/downstream_final.pt
|
| 94 |
+
2026-01-21 22:14:07 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/downstream_final.pt
|
downstream/20260121_2243/config.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
backbone:
|
| 10 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
freeze: true
|
| 12 |
+
from_ema: false
|
| 13 |
+
dataset:
|
| 14 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 15 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 16 |
+
heads:
|
| 17 |
+
hidden_dim: 768
|
| 18 |
+
init_from: score_projector
|
| 19 |
+
musicality:
|
| 20 |
+
use_mlp: true
|
| 21 |
+
ordinal: false
|
| 22 |
+
dropout: 0.1
|
| 23 |
+
num_categories: 9
|
| 24 |
+
y_min: 1.0
|
| 25 |
+
y_max: 5.0
|
| 26 |
+
step: 0.5
|
| 27 |
+
alignment:
|
| 28 |
+
use_mlp: true
|
| 29 |
+
ordinal: false
|
| 30 |
+
dropout: 0.1
|
| 31 |
+
num_categories: 9
|
| 32 |
+
y_min: 1.0
|
| 33 |
+
y_max: 5.0
|
| 34 |
+
step: 0.5
|
| 35 |
+
preference:
|
| 36 |
+
use_mlp: true
|
| 37 |
+
dropout: 0.1
|
| 38 |
+
train:
|
| 39 |
+
num_epochs: 10
|
| 40 |
+
num_train_steps: 2000
|
| 41 |
+
batch_size: 48
|
| 42 |
+
learning_rate: 0.001
|
| 43 |
+
weight_decay: 0.01
|
| 44 |
+
max_grad_norm: 1.0
|
| 45 |
+
warmup_steps: 100
|
| 46 |
+
schedule_type: cosine
|
| 47 |
+
min_lr_ratio: 0.01
|
| 48 |
+
dataset_mode: sequential
|
| 49 |
+
steps_per_task: 5000
|
| 50 |
+
log_interval: 200
|
| 51 |
+
val_interval: 1000
|
| 52 |
+
save_interval: 1000
|
| 53 |
+
num_workers: 8
|
| 54 |
+
resume: null
|
| 55 |
+
device: cuda:3
|
downstream/20260121_2243/train.log
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 22:43:58 | INFO | Starting downstream training: 20260121_2243
|
| 2 |
+
2026-01-21 22:43:58 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243
|
| 3 |
+
2026-01-21 22:43:58 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/config.yaml
|
| 4 |
+
2026-01-21 22:43:58 | INFO | Training tasks: ['musicality', 'alignment']
|
| 5 |
+
2026-01-21 22:43:58 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 22:44:01 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-21 22:44:01 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 22:44:08 | WARNING | Missing keys: 283
|
| 9 |
+
2026-01-21 22:44:09 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 10 |
+
2026-01-21 22:44:09 | INFO | Added mlp head for task 'musicality'
|
| 11 |
+
2026-01-21 22:44:09 | INFO | Added mlp head for task 'alignment'
|
| 12 |
+
2026-01-21 22:44:09 | INFO | Initializing heads from backbone 'score_projector'
|
| 13 |
+
2026-01-21 22:44:09 | INFO | Initializing 2 heads from 'score_projector'
|
| 14 |
+
2026-01-21 22:44:09 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 15 |
+
2026-01-21 22:44:09 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 16 |
+
2026-01-21 22:44:09 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 17 |
+
2026-01-21 22:44:09 | INFO | Loaded 6 parameters, 0 missing
|
| 18 |
+
2026-01-21 22:44:09 | INFO | ✓ Head initialized from 'score_projector'
|
| 19 |
+
2026-01-21 22:44:09 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 20 |
+
2026-01-21 22:44:09 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 21 |
+
2026-01-21 22:44:09 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 22 |
+
2026-01-21 22:44:09 | INFO | Loaded 6 parameters, 0 missing
|
| 23 |
+
2026-01-21 22:44:09 | INFO | ✓ Head initialized from 'score_projector'
|
| 24 |
+
2026-01-21 22:44:09 | INFO | ✓ All heads initialized
|
| 25 |
+
2026-01-21 22:44:09 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 26 |
+
2026-01-21 22:44:09 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 27 |
+
2026-01-21 22:44:09 | INFO | Task 'musicality': train=4322, test=913
|
| 28 |
+
2026-01-21 22:44:09 | INFO | Task 'alignment': train=1923, test=913
|
| 29 |
+
2026-01-21 22:44:09 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
|
| 30 |
+
2026-01-21 22:44:09 | INFO |
|
| 31 |
+
============================================================
|
| 32 |
+
2026-01-21 22:44:09 | INFO | Starting Task 1/2: musicality
|
| 33 |
+
2026-01-21 22:44:09 | INFO | ============================================================
|
| 34 |
+
2026-01-21 22:44:09 | INFO | Task 'musicality' trainable parameters: 592,897
|
| 35 |
+
2026-01-21 22:45:12 | INFO | [Task musicality][Step 200/5000] loss=1.1968 | mae=1.1968
|
| 36 |
+
2026-01-21 22:46:00 | INFO | [Task musicality][Step 400/5000] loss=0.5557 | mae=0.5557
|
| 37 |
+
2026-01-21 22:46:55 | INFO | [Task musicality][Step 600/5000] loss=0.5162 | mae=0.5162
|
| 38 |
+
2026-01-21 22:47:56 | INFO | [Task musicality][Step 800/5000] loss=0.4912 | mae=0.4912
|
| 39 |
+
2026-01-21 22:48:59 | INFO | [Task musicality][Step 1000/5000] loss=0.4753 | mae=0.4753
|
| 40 |
+
2026-01-21 22:49:05 | INFO | [Val] musicality: loss=0.6083 | mae=0.6083
|
| 41 |
+
2026-01-21 22:49:55 | INFO | [Task musicality][Step 1200/5000] loss=0.4478 | mae=0.4478
|
| 42 |
+
2026-01-21 22:50:55 | INFO | [Task musicality][Step 1400/5000] loss=0.4372 | mae=0.4372
|
| 43 |
+
2026-01-21 22:51:46 | INFO | [Task musicality][Step 1600/5000] loss=0.4211 | mae=0.4211
|
| 44 |
+
2026-01-21 22:52:40 | INFO | [Task musicality][Step 1800/5000] loss=0.4047 | mae=0.4047
|
| 45 |
+
2026-01-21 22:53:35 | INFO | [Task musicality][Step 2000/5000] loss=0.3909 | mae=0.3909
|
| 46 |
+
2026-01-21 22:53:39 | INFO | [Val] musicality: loss=0.6811 | mae=0.6811
|
| 47 |
+
2026-01-21 22:54:28 | INFO | [Task musicality][Step 2200/5000] loss=0.3705 | mae=0.3705
|
| 48 |
+
2026-01-21 22:55:18 | INFO | [Task musicality][Step 2400/5000] loss=0.3584 | mae=0.3584
|
| 49 |
+
2026-01-21 22:56:08 | INFO | [Task musicality][Step 2600/5000] loss=0.3527 | mae=0.3527
|
| 50 |
+
2026-01-21 22:57:02 | INFO | [Task musicality][Step 2800/5000] loss=0.3346 | mae=0.3346
|
| 51 |
+
2026-01-21 22:57:51 | INFO | [Task musicality][Step 3000/5000] loss=0.3218 | mae=0.3218
|
| 52 |
+
2026-01-21 22:57:56 | INFO | [Val] musicality: loss=0.7101 | mae=0.7101
|
| 53 |
+
2026-01-21 22:58:44 | INFO | [Task musicality][Step 3200/5000] loss=0.3070 | mae=0.3070
|
| 54 |
+
2026-01-21 22:59:34 | INFO | [Task musicality][Step 3400/5000] loss=0.2961 | mae=0.2961
|
| 55 |
+
2026-01-21 23:00:25 | INFO | [Task musicality][Step 3600/5000] loss=0.2865 | mae=0.2865
|
| 56 |
+
2026-01-21 23:01:18 | INFO | [Task musicality][Step 3800/5000] loss=0.2746 | mae=0.2746
|
| 57 |
+
2026-01-21 23:02:07 | INFO | [Task musicality][Step 4000/5000] loss=0.2674 | mae=0.2674
|
| 58 |
+
2026-01-21 23:02:10 | INFO | [Val] musicality: loss=0.7399 | mae=0.7399
|
| 59 |
+
2026-01-21 23:02:59 | INFO | [Task musicality][Step 4200/5000] loss=0.2614 | mae=0.2614
|
| 60 |
+
2026-01-21 23:03:51 | INFO | [Task musicality][Step 4400/5000] loss=0.2596 | mae=0.2596
|
| 61 |
+
2026-01-21 23:04:44 | INFO | [Task musicality][Step 4600/5000] loss=0.2512 | mae=0.2512
|
| 62 |
+
2026-01-21 23:05:30 | INFO | [Task musicality][Step 4800/5000] loss=0.2500 | mae=0.2500
|
| 63 |
+
2026-01-21 23:06:20 | INFO | [Task musicality][Step 5000/5000] loss=0.2514 | mae=0.2514
|
| 64 |
+
2026-01-21 23:06:24 | INFO | [Val] musicality: loss=0.7481 | mae=0.7481
|
| 65 |
+
2026-01-21 23:06:24 | INFO | Task 'musicality' complete. Running validation...
|
| 66 |
+
2026-01-21 23:06:28 | INFO | [Final Val for musicality] loss=0.7481 | mae=0.7481
|
| 67 |
+
2026-01-21 23:06:28 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/task_musicality_final.pt
|
| 68 |
+
2026-01-21 23:06:28 | INFO |
|
| 69 |
+
============================================================
|
| 70 |
+
2026-01-21 23:06:28 | INFO | Starting Task 2/2: alignment
|
| 71 |
+
2026-01-21 23:06:28 | INFO | ============================================================
|
| 72 |
+
2026-01-21 23:06:28 | INFO | Task 'alignment' trainable parameters: 592,897
|
| 73 |
+
2026-01-21 23:07:03 | INFO | [Task alignment][Step 200/5000] loss=1.0436 | mae=1.0436
|
| 74 |
+
2026-01-21 23:07:36 | INFO | [Task alignment][Step 400/5000] loss=0.5449 | mae=0.5449
|
| 75 |
+
2026-01-21 23:08:11 | INFO | [Task alignment][Step 600/5000] loss=0.4820 | mae=0.4820
|
| 76 |
+
2026-01-21 23:08:39 | INFO | [Task alignment][Step 800/5000] loss=0.4418 | mae=0.4418
|
| 77 |
+
2026-01-21 23:09:09 | INFO | [Task alignment][Step 1000/5000] loss=0.4081 | mae=0.4081
|
| 78 |
+
2026-01-21 23:09:14 | INFO | [Val] alignment: loss=0.6495 | mae=0.6495
|
| 79 |
+
2026-01-21 23:09:46 | INFO | [Task alignment][Step 1200/5000] loss=0.3896 | mae=0.3896
|
| 80 |
+
2026-01-21 23:10:18 | INFO | [Task alignment][Step 1400/5000] loss=0.3590 | mae=0.3590
|
| 81 |
+
2026-01-21 23:10:50 | INFO | [Task alignment][Step 1600/5000] loss=0.3413 | mae=0.3413
|
| 82 |
+
2026-01-21 23:11:26 | INFO | [Task alignment][Step 1800/5000] loss=0.3241 | mae=0.3241
|
| 83 |
+
2026-01-21 23:11:58 | INFO | [Task alignment][Step 2000/5000] loss=0.3065 | mae=0.3065
|
| 84 |
+
2026-01-21 23:12:04 | INFO | [Val] alignment: loss=0.6834 | mae=0.6834
|
| 85 |
+
2026-01-21 23:12:37 | INFO | [Task alignment][Step 2200/5000] loss=0.2848 | mae=0.2848
|
| 86 |
+
2026-01-21 23:13:09 | INFO | [Task alignment][Step 2400/5000] loss=0.2722 | mae=0.2722
|
| 87 |
+
2026-01-21 23:13:44 | INFO | [Task alignment][Step 2600/5000] loss=0.2566 | mae=0.2566
|
| 88 |
+
2026-01-21 23:14:15 | INFO | [Task alignment][Step 2800/5000] loss=0.2472 | mae=0.2472
|
| 89 |
+
2026-01-21 23:14:47 | INFO | [Task alignment][Step 3000/5000] loss=0.2325 | mae=0.2325
|
| 90 |
+
2026-01-21 23:14:53 | INFO | [Val] alignment: loss=0.7010 | mae=0.7010
|
| 91 |
+
2026-01-21 23:15:24 | INFO | [Task alignment][Step 3200/5000] loss=0.2203 | mae=0.2203
|
| 92 |
+
2026-01-21 23:15:59 | INFO | [Task alignment][Step 3400/5000] loss=0.2091 | mae=0.2091
|
| 93 |
+
2026-01-21 23:16:33 | INFO | [Task alignment][Step 3600/5000] loss=0.2033 | mae=0.2033
|
| 94 |
+
2026-01-21 23:17:11 | INFO | [Task alignment][Step 3800/5000] loss=0.1936 | mae=0.1936
|
| 95 |
+
2026-01-21 23:17:45 | INFO | [Task alignment][Step 4000/5000] loss=0.1850 | mae=0.1850
|
| 96 |
+
2026-01-21 23:17:50 | INFO | [Val] alignment: loss=0.7168 | mae=0.7168
|
| 97 |
+
2026-01-21 23:18:25 | INFO | [Task alignment][Step 4200/5000] loss=0.1814 | mae=0.1814
|
| 98 |
+
2026-01-21 23:18:57 | INFO | [Task alignment][Step 4400/5000] loss=0.1766 | mae=0.1766
|
| 99 |
+
2026-01-21 23:19:31 | INFO | [Task alignment][Step 4600/5000] loss=0.1719 | mae=0.1719
|
| 100 |
+
2026-01-21 23:20:05 | INFO | [Task alignment][Step 4800/5000] loss=0.1727 | mae=0.1727
|
| 101 |
+
2026-01-21 23:20:39 | INFO | [Task alignment][Step 5000/5000] loss=0.1724 | mae=0.1724
|
| 102 |
+
2026-01-21 23:20:45 | INFO | [Val] alignment: loss=0.7154 | mae=0.7154
|
| 103 |
+
2026-01-21 23:20:45 | INFO | Task 'alignment' complete. Running validation...
|
| 104 |
+
2026-01-21 23:20:49 | INFO | [Final Val for alignment] loss=0.7154 | mae=0.7154
|
| 105 |
+
2026-01-21 23:20:49 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/task_alignment_final.pt
|
| 106 |
+
2026-01-21 23:20:49 | INFO |
|
| 107 |
+
============================================================
|
| 108 |
+
2026-01-21 23:20:49 | INFO | All tasks complete. Running final validation for all tasks...
|
| 109 |
+
2026-01-21 23:20:53 | INFO | [Final Val] musicality: loss=0.7481 | mae=0.7481
|
| 110 |
+
2026-01-21 23:20:58 | INFO | [Final Val] alignment: loss=0.7154 | mae=0.7154
|
| 111 |
+
2026-01-21 23:20:58 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/downstream_final.pt
|
| 112 |
+
2026-01-21 23:20:58 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/downstream_final.pt
|
downstream/20260121_2300/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
backbone:
|
| 10 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune
|
| 11 |
+
transformer/ckpt/reward_model.0.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: score_projector
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: true
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: true
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: true
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 5000
|
| 51 |
+
log_interval: 200
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:0
|
downstream/20260121_2300/train.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 23:00:41 | INFO | Starting downstream training: 20260121_2300
|
| 2 |
+
2026-01-21 23:00:41 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2300
|
| 3 |
+
2026-01-21 23:00:41 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2300/config.yaml
|
| 4 |
+
2026-01-21 23:00:41 | INFO | Training tasks: ['musicality', 'alignment']
|
| 5 |
+
2026-01-21 23:00:41 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 23:00:44 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
|
| 7 |
+
2026-01-21 23:00:44 | INFO | Using checkpoint config for model
|
downstream/20260121_2319/config.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
backbone:
|
| 10 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
freeze: true
|
| 12 |
+
from_ema: false
|
| 13 |
+
dataset:
|
| 14 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 15 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 16 |
+
heads:
|
| 17 |
+
hidden_dim: 768
|
| 18 |
+
init_from: score_projector
|
| 19 |
+
musicality:
|
| 20 |
+
use_mlp: true
|
| 21 |
+
ordinal: false
|
| 22 |
+
dropout: 0.1
|
| 23 |
+
num_categories: 9
|
| 24 |
+
y_min: 1.0
|
| 25 |
+
y_max: 5.0
|
| 26 |
+
step: 0.5
|
| 27 |
+
alignment:
|
| 28 |
+
use_mlp: true
|
| 29 |
+
ordinal: false
|
| 30 |
+
dropout: 0.1
|
| 31 |
+
num_categories: 9
|
| 32 |
+
y_min: 1.0
|
| 33 |
+
y_max: 5.0
|
| 34 |
+
step: 0.5
|
| 35 |
+
preference:
|
| 36 |
+
use_mlp: true
|
| 37 |
+
dropout: 0.1
|
| 38 |
+
train:
|
| 39 |
+
num_epochs: 10
|
| 40 |
+
num_train_steps: 2000
|
| 41 |
+
batch_size: 48
|
| 42 |
+
learning_rate: 0.001
|
| 43 |
+
weight_decay: 0.01
|
| 44 |
+
max_grad_norm: 1.0
|
| 45 |
+
warmup_steps: 100
|
| 46 |
+
schedule_type: cosine
|
| 47 |
+
min_lr_ratio: 0.01
|
| 48 |
+
dataset_mode: sequential
|
| 49 |
+
steps_per_task: 5000
|
| 50 |
+
log_interval: 200
|
| 51 |
+
val_interval: 1000
|
| 52 |
+
save_interval: 1000
|
| 53 |
+
num_workers: 8
|
| 54 |
+
resume: null
|
| 55 |
+
device: cuda:1
|
downstream/20260121_2319/train.log
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 23:19:02 | INFO | Starting downstream training: 20260121_2319
|
| 2 |
+
2026-01-21 23:19:02 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319
|
| 3 |
+
2026-01-21 23:19:02 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319/config.yaml
|
| 4 |
+
2026-01-21 23:19:02 | INFO | Training tasks: ['musicality', 'alignment']
|
| 5 |
+
2026-01-21 23:19:02 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 23:19:22 | INFO | Starting downstream training: 20260121_2319
|
| 7 |
+
2026-01-21 23:19:22 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319
|
| 8 |
+
2026-01-21 23:19:22 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319/config.yaml
|
| 9 |
+
2026-01-21 23:19:22 | INFO | Training tasks: ['musicality', 'alignment']
|
| 10 |
+
2026-01-21 23:19:22 | INFO | Dataset mode: sequential
|
| 11 |
+
2026-01-21 23:19:39 | INFO | Starting downstream training: 20260121_2319
|
| 12 |
+
2026-01-21 23:19:39 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319
|
| 13 |
+
2026-01-21 23:19:39 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319/config.yaml
|
| 14 |
+
2026-01-21 23:19:39 | INFO | Training tasks: ['musicality', 'alignment']
|
| 15 |
+
2026-01-21 23:19:39 | INFO | Dataset mode: sequential
|
| 16 |
+
2026-01-21 23:19:42 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 17 |
+
2026-01-21 23:19:42 | INFO | Using checkpoint config for model
|
| 18 |
+
2026-01-21 23:19:48 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
|
| 19 |
+
2026-01-21 23:19:48 | WARNING | Missing keys: 283
|
| 20 |
+
2026-01-21 23:19:49 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 21 |
+
2026-01-21 23:19:49 | INFO | Added mlp head for task 'musicality'
|
| 22 |
+
2026-01-21 23:19:49 | INFO | Added mlp head for task 'alignment'
|
| 23 |
+
2026-01-21 23:19:49 | INFO | Initializing heads from backbone 'score_projector'
|
| 24 |
+
2026-01-21 23:19:49 | INFO | Initializing 2 heads from 'score_projector'
|
| 25 |
+
2026-01-21 23:19:49 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 26 |
+
2026-01-21 23:19:49 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 27 |
+
2026-01-21 23:19:49 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 28 |
+
2026-01-21 23:19:49 | INFO | Loaded 6 parameters, 0 missing
|
| 29 |
+
2026-01-21 23:19:49 | INFO | ✓ Head initialized from 'score_projector'
|
| 30 |
+
2026-01-21 23:19:49 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 31 |
+
2026-01-21 23:19:49 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 32 |
+
2026-01-21 23:19:49 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 33 |
+
2026-01-21 23:19:49 | INFO | Loaded 6 parameters, 0 missing
|
| 34 |
+
2026-01-21 23:19:49 | INFO | ✓ Head initialized from 'score_projector'
|
| 35 |
+
2026-01-21 23:19:49 | INFO | ✓ All heads initialized
|
| 36 |
+
2026-01-21 23:19:49 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 37 |
+
2026-01-21 23:19:49 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 38 |
+
2026-01-21 23:19:49 | INFO | Task 'musicality': train=4322, test=913
|
| 39 |
+
2026-01-21 23:19:49 | INFO | Task 'alignment': train=1923, test=913
|
| 40 |
+
2026-01-21 23:19:49 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
|
| 41 |
+
2026-01-21 23:19:49 | INFO |
|
| 42 |
+
============================================================
|
| 43 |
+
2026-01-21 23:19:49 | INFO | Starting Task 1/2: musicality
|
| 44 |
+
2026-01-21 23:19:49 | INFO | ============================================================
|
| 45 |
+
2026-01-21 23:19:49 | INFO | Task 'musicality' trainable parameters: 592,897
|
downstream/20260121_2327/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
backbone:
|
| 10 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune
|
| 11 |
+
transformer/ckpt/reward_model.0.pt
|
| 12 |
+
freeze: true
|
| 13 |
+
from_ema: false
|
| 14 |
+
dataset:
|
| 15 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 16 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 17 |
+
heads:
|
| 18 |
+
hidden_dim: 768
|
| 19 |
+
init_from: score_projector
|
| 20 |
+
musicality:
|
| 21 |
+
use_mlp: true
|
| 22 |
+
ordinal: false
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
num_categories: 9
|
| 25 |
+
y_min: 1.0
|
| 26 |
+
y_max: 5.0
|
| 27 |
+
step: 0.5
|
| 28 |
+
alignment:
|
| 29 |
+
use_mlp: true
|
| 30 |
+
ordinal: false
|
| 31 |
+
dropout: 0.1
|
| 32 |
+
num_categories: 9
|
| 33 |
+
y_min: 1.0
|
| 34 |
+
y_max: 5.0
|
| 35 |
+
step: 0.5
|
| 36 |
+
preference:
|
| 37 |
+
use_mlp: true
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
train:
|
| 40 |
+
num_epochs: 10
|
| 41 |
+
num_train_steps: 2000
|
| 42 |
+
batch_size: 48
|
| 43 |
+
learning_rate: 0.001
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
max_grad_norm: 1.0
|
| 46 |
+
warmup_steps: 100
|
| 47 |
+
schedule_type: cosine
|
| 48 |
+
min_lr_ratio: 0.01
|
| 49 |
+
dataset_mode: sequential
|
| 50 |
+
steps_per_task: 5000
|
| 51 |
+
log_interval: 200
|
| 52 |
+
val_interval: 1000
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:1
|
downstream/20260121_2327/train.log
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 23:27:01 | INFO | Starting downstream training: 20260121_2327
|
| 2 |
+
2026-01-21 23:27:01 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327
|
| 3 |
+
2026-01-21 23:27:01 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/config.yaml
|
| 4 |
+
2026-01-21 23:27:01 | INFO | Training tasks: ['musicality', 'alignment']
|
| 5 |
+
2026-01-21 23:27:01 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-21 23:27:03 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
|
| 7 |
+
2026-01-21 23:27:03 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-21 23:27:09 | INFO | Skipping score_projector.3.weight: shape mismatch (ckpt torch.Size([1, 768]) vs model torch.Size([2, 768])), will use randomly initialized weights
|
| 9 |
+
2026-01-21 23:27:09 | INFO | Skipping score_projector.3.bias: shape mismatch (ckpt torch.Size([1]) vs model torch.Size([2])), will use randomly initialized weights
|
| 10 |
+
2026-01-21 23:27:09 | INFO | Missing keys (570): ['score_projector.3.weight', 'score_projector.3.bias', 'text_module.model.shared.weight', 'text_module.model.encoder.embed_tokens.weight', 'text_module.model.encoder.block.0.layer.0.SelfAttention.q.weight']...
|
| 11 |
+
2026-01-21 23:27:09 | WARNING | Missing keys: 59
|
| 12 |
+
2026-01-21 23:27:10 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
|
| 13 |
+
2026-01-21 23:27:10 | INFO | Added mlp head for task 'musicality'
|
| 14 |
+
2026-01-21 23:27:10 | INFO | Added mlp head for task 'alignment'
|
| 15 |
+
2026-01-21 23:27:10 | INFO | Initializing heads from backbone 'score_projector'
|
| 16 |
+
2026-01-21 23:27:10 | INFO | Initializing 2 heads from 'score_projector'
|
| 17 |
+
2026-01-21 23:27:10 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 18 |
+
2026-01-21 23:27:10 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 19 |
+
2026-01-21 23:27:10 | INFO | Mapped 3.weight -> 4.weight
|
| 20 |
+
2026-01-21 23:27:10 | INFO | Mapped 3.bias -> 4.bias
|
| 21 |
+
2026-01-21 23:27:10 | INFO | Loaded 6 parameters, 0 missing
|
| 22 |
+
2026-01-21 23:27:10 | INFO | ✓ Head initialized from 'score_projector'
|
| 23 |
+
2026-01-21 23:27:10 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 24 |
+
2026-01-21 23:27:10 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 25 |
+
2026-01-21 23:27:10 | INFO | Mapped 3.weight -> 4.weight
|
| 26 |
+
2026-01-21 23:27:10 | INFO | Mapped 3.bias -> 4.bias
|
| 27 |
+
2026-01-21 23:27:10 | INFO | Loaded 6 parameters, 0 missing
|
| 28 |
+
2026-01-21 23:27:10 | INFO | ✓ Head initialized from 'score_projector'
|
| 29 |
+
2026-01-21 23:27:10 | INFO | ✓ All heads initialized
|
| 30 |
+
2026-01-21 23:27:10 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 31 |
+
2026-01-21 23:27:10 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 32 |
+
2026-01-21 23:27:10 | INFO | Task 'musicality': train=4322, test=913
|
| 33 |
+
2026-01-21 23:27:10 | INFO | Task 'alignment': train=1923, test=913
|
| 34 |
+
2026-01-21 23:27:10 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
|
| 35 |
+
2026-01-21 23:27:10 | INFO |
|
| 36 |
+
============================================================
|
| 37 |
+
2026-01-21 23:27:10 | INFO | Starting Task 1/2: musicality
|
| 38 |
+
2026-01-21 23:27:10 | INFO | ============================================================
|
| 39 |
+
2026-01-21 23:27:10 | INFO | Task 'musicality' trainable parameters: 592,897
|
| 40 |
+
2026-01-21 23:27:43 | INFO | Starting downstream training: 20260121_2327
|
| 41 |
+
2026-01-21 23:27:43 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327
|
| 42 |
+
2026-01-21 23:27:43 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/config.yaml
|
| 43 |
+
2026-01-21 23:27:43 | INFO | Training tasks: ['musicality', 'alignment']
|
| 44 |
+
2026-01-21 23:27:43 | INFO | Dataset mode: sequential
|
| 45 |
+
2026-01-21 23:27:45 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
|
| 46 |
+
2026-01-21 23:27:45 | INFO | Using checkpoint config for model
|
| 47 |
+
2026-01-21 23:27:51 | INFO | Skipping score_projector.3.weight: shape mismatch (ckpt torch.Size([1, 768]) vs model torch.Size([2, 768])), will use randomly initialized weights
|
| 48 |
+
2026-01-21 23:27:51 | INFO | Skipping score_projector.3.bias: shape mismatch (ckpt torch.Size([1]) vs model torch.Size([2])), will use randomly initialized weights
|
| 49 |
+
2026-01-21 23:27:51 | INFO | Missing keys (570): ['score_projector.3.weight', 'score_projector.3.bias', 'text_module.model.shared.weight', 'text_module.model.encoder.embed_tokens.weight', 'text_module.model.encoder.block.0.layer.0.SelfAttention.q.weight']...
|
| 50 |
+
2026-01-21 23:27:51 | WARNING | Missing keys: 59
|
| 51 |
+
2026-01-21 23:27:52 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
|
| 52 |
+
2026-01-21 23:27:52 | INFO | Added mlp head for task 'musicality'
|
| 53 |
+
2026-01-21 23:27:52 | INFO | Added mlp head for task 'alignment'
|
| 54 |
+
2026-01-21 23:27:52 | INFO | Initializing heads from backbone 'score_projector'
|
| 55 |
+
2026-01-21 23:27:52 | INFO | Initializing 2 heads from 'score_projector'
|
| 56 |
+
2026-01-21 23:27:52 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 57 |
+
2026-01-21 23:27:52 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 58 |
+
2026-01-21 23:27:52 | INFO | Mapped 3.weight -> 4.weight
|
| 59 |
+
2026-01-21 23:27:52 | INFO | Mapped 3.bias -> 4.bias
|
| 60 |
+
2026-01-21 23:27:52 | INFO | Loaded 6 parameters, 0 missing
|
| 61 |
+
2026-01-21 23:27:52 | INFO | ✓ Head initialized from 'score_projector'
|
| 62 |
+
2026-01-21 23:27:52 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 63 |
+
2026-01-21 23:27:52 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 64 |
+
2026-01-21 23:27:52 | INFO | Mapped 3.weight -> 4.weight
|
| 65 |
+
2026-01-21 23:27:52 | INFO | Mapped 3.bias -> 4.bias
|
| 66 |
+
2026-01-21 23:27:52 | INFO | Loaded 6 parameters, 0 missing
|
| 67 |
+
2026-01-21 23:27:52 | INFO | ✓ Head initialized from 'score_projector'
|
| 68 |
+
2026-01-21 23:27:52 | INFO | ✓ All heads initialized
|
| 69 |
+
2026-01-21 23:27:52 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 70 |
+
2026-01-21 23:27:52 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 71 |
+
2026-01-21 23:27:52 | INFO | Task 'musicality': train=4322, test=913
|
| 72 |
+
2026-01-21 23:27:52 | INFO | Task 'alignment': train=1923, test=913
|
| 73 |
+
2026-01-21 23:27:52 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
|
| 74 |
+
2026-01-21 23:27:52 | INFO |
|
| 75 |
+
============================================================
|
| 76 |
+
2026-01-21 23:27:52 | INFO | Starting Task 1/2: musicality
|
| 77 |
+
2026-01-21 23:27:52 | INFO | ============================================================
|
| 78 |
+
2026-01-21 23:27:52 | INFO | Task 'musicality' trainable parameters: 592,897
|
| 79 |
+
2026-01-21 23:28:46 | INFO | [Task musicality][Step 200/5000] loss=0.7480 | mse=1.1321
|
| 80 |
+
2026-01-21 23:29:34 | INFO | [Task musicality][Step 400/5000] loss=0.5146 | mse=0.4447
|
| 81 |
+
2026-01-21 23:30:28 | INFO | [Task musicality][Step 600/5000] loss=0.4973 | mse=0.4153
|
| 82 |
+
2026-01-21 23:31:20 | INFO | [Task musicality][Step 800/5000] loss=0.4934 | mse=0.4153
|
| 83 |
+
2026-01-21 23:32:17 | INFO | [Task musicality][Step 1000/5000] loss=0.4766 | mse=0.3859
|
| 84 |
+
2026-01-21 23:32:22 | INFO | [Val] musicality: loss=1.5986 | mse=3.2512
|
| 85 |
+
2026-01-21 23:33:12 | INFO | [Task musicality][Step 1200/5000] loss=0.4538 | mse=0.3574
|
| 86 |
+
2026-01-21 23:34:00 | INFO | [Task musicality][Step 1400/5000] loss=0.4543 | mse=0.3581
|
| 87 |
+
2026-01-21 23:34:54 | INFO | [Task musicality][Step 1600/5000] loss=0.4426 | mse=0.3487
|
| 88 |
+
2026-01-21 23:35:45 | INFO | [Task musicality][Step 1800/5000] loss=0.4348 | mse=0.3336
|
| 89 |
+
2026-01-21 23:36:40 | INFO | [Task musicality][Step 2000/5000] loss=0.4232 | mse=0.3218
|
| 90 |
+
2026-01-21 23:36:43 | INFO | [Val] musicality: loss=1.0229 | mse=1.7325
|
| 91 |
+
2026-01-21 23:37:32 | INFO | [Task musicality][Step 2200/5000] loss=0.4281 | mse=0.3258
|
| 92 |
+
2026-01-21 23:38:22 | INFO | [Task musicality][Step 2400/5000] loss=0.4129 | mse=0.3094
|
| 93 |
+
2026-01-21 23:39:13 | INFO | [Task musicality][Step 2600/5000] loss=0.4015 | mse=0.2967
|
| 94 |
+
2026-01-21 23:40:04 | INFO | [Task musicality][Step 2800/5000] loss=0.3916 | mse=0.2818
|
| 95 |
+
2026-01-21 23:40:52 | INFO | [Task musicality][Step 3000/5000] loss=0.3814 | mse=0.2732
|
| 96 |
+
2026-01-21 23:40:55 | INFO | [Val] musicality: loss=0.7424 | mse=0.9581
|
| 97 |
+
2026-01-21 23:41:42 | INFO | [Task musicality][Step 3200/5000] loss=0.3737 | mse=0.2659
|
| 98 |
+
2026-01-21 23:42:30 | INFO | [Task musicality][Step 3400/5000] loss=0.3633 | mse=0.2544
|
| 99 |
+
2026-01-21 23:43:27 | INFO | [Task musicality][Step 3600/5000] loss=0.3555 | mse=0.2459
|
| 100 |
+
2026-01-21 23:44:19 | INFO | [Task musicality][Step 3800/5000] loss=0.3470 | mse=0.2390
|
| 101 |
+
2026-01-21 23:45:12 | INFO | [Task musicality][Step 4000/5000] loss=0.3362 | mse=0.2276
|
| 102 |
+
2026-01-21 23:45:14 | INFO | [Val] musicality: loss=0.7599 | mse=0.9227
|
| 103 |
+
2026-01-21 23:46:09 | INFO | [Task musicality][Step 4200/5000] loss=0.3332 | mse=0.2259
|
| 104 |
+
2026-01-21 23:47:02 | INFO | [Task musicality][Step 4400/5000] loss=0.3287 | mse=0.2197
|
| 105 |
+
2026-01-21 23:48:00 | INFO | [Task musicality][Step 4600/5000] loss=0.3225 | mse=0.2162
|
| 106 |
+
2026-01-21 23:48:52 | INFO | [Task musicality][Step 4800/5000] loss=0.3192 | mse=0.2123
|
| 107 |
+
2026-01-21 23:50:06 | INFO | [Task musicality][Step 5000/5000] loss=0.3236 | mse=0.2188
|
| 108 |
+
2026-01-21 23:50:09 | INFO | [Val] musicality: loss=0.7602 | mse=0.9111
|
| 109 |
+
2026-01-21 23:50:09 | INFO | Task 'musicality' complete. Running validation...
|
| 110 |
+
2026-01-21 23:50:12 | INFO | [Final Val for musicality] loss=0.7602 | mse=0.9111
|
| 111 |
+
2026-01-21 23:50:12 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/task_musicality_final.pt
|
| 112 |
+
2026-01-21 23:50:12 | INFO |
|
| 113 |
+
============================================================
|
| 114 |
+
2026-01-21 23:50:12 | INFO | Starting Task 2/2: alignment
|
| 115 |
+
2026-01-21 23:50:12 | INFO | ============================================================
|
| 116 |
+
2026-01-21 23:50:12 | INFO | Task 'alignment' trainable parameters: 592,897
|
| 117 |
+
2026-01-21 23:50:31 | INFO | [Task alignment][Step 200/5000] loss=0.7793 | mse=1.1387
|
| 118 |
+
2026-01-21 23:50:47 | INFO | [Task alignment][Step 400/5000] loss=0.5957 | mse=0.5738
|
| 119 |
+
2026-01-21 23:51:07 | INFO | [Task alignment][Step 600/5000] loss=0.5749 | mse=0.5411
|
| 120 |
+
2026-01-21 23:51:25 | INFO | [Task alignment][Step 800/5000] loss=0.5583 | mse=0.5129
|
| 121 |
+
2026-01-21 23:51:42 | INFO | [Task alignment][Step 1000/5000] loss=0.5405 | mse=0.4850
|
| 122 |
+
2026-01-21 23:51:45 | INFO | [Val] alignment: loss=2.3135 | mse=6.4499
|
| 123 |
+
2026-01-21 23:52:05 | INFO | [Task alignment][Step 1200/5000] loss=0.5375 | mse=0.4818
|
| 124 |
+
2026-01-21 23:52:23 | INFO | [Task alignment][Step 1400/5000] loss=0.5087 | mse=0.4395
|
| 125 |
+
2026-01-21 23:52:40 | INFO | [Task alignment][Step 1600/5000] loss=0.4874 | mse=0.4093
|
| 126 |
+
2026-01-21 23:53:04 | INFO | [Task alignment][Step 1800/5000] loss=0.4706 | mse=0.3846
|
| 127 |
+
2026-01-21 23:53:26 | INFO | [Task alignment][Step 2000/5000] loss=0.4602 | mse=0.3714
|
| 128 |
+
2026-01-21 23:53:29 | INFO | [Val] alignment: loss=1.9561 | mse=5.3080
|
| 129 |
+
2026-01-21 23:53:51 | INFO | [Task alignment][Step 2200/5000] loss=0.4380 | mse=0.3463
|
| 130 |
+
2026-01-21 23:54:11 | INFO | [Task alignment][Step 2400/5000] loss=0.4152 | mse=0.3179
|
| 131 |
+
2026-01-21 23:54:27 | INFO | [Task alignment][Step 2600/5000] loss=0.3968 | mse=0.2991
|
| 132 |
+
2026-01-21 23:54:47 | INFO | [Task alignment][Step 2800/5000] loss=0.3833 | mse=0.2826
|
| 133 |
+
2026-01-21 23:55:06 | INFO | [Task alignment][Step 3000/5000] loss=0.3697 | mse=0.2693
|
| 134 |
+
2026-01-21 23:55:08 | INFO | [Val] alignment: loss=1.5442 | mse=3.9591
|
| 135 |
+
2026-01-21 23:55:24 | INFO | [Task alignment][Step 3200/5000] loss=0.3551 | mse=0.2510
|
| 136 |
+
2026-01-21 23:55:46 | INFO | [Task alignment][Step 3400/5000] loss=0.3389 | mse=0.2355
|
| 137 |
+
2026-01-21 23:56:02 | INFO | [Task alignment][Step 3600/5000] loss=0.3294 | mse=0.2257
|
| 138 |
+
2026-01-21 23:56:22 | INFO | [Task alignment][Step 3800/5000] loss=0.3186 | mse=0.2177
|
| 139 |
+
2026-01-21 23:56:44 | INFO | [Task alignment][Step 4000/5000] loss=0.3100 | mse=0.2095
|
| 140 |
+
2026-01-21 23:56:47 | INFO | [Val] alignment: loss=1.2328 | mse=2.7735
|
| 141 |
+
2026-01-21 23:57:03 | INFO | [Task alignment][Step 4200/5000] loss=0.2984 | mse=0.1996
|
| 142 |
+
2026-01-21 23:57:19 | INFO | [Task alignment][Step 4400/5000] loss=0.2988 | mse=0.1998
|
| 143 |
+
2026-01-21 23:57:38 | INFO | [Task alignment][Step 4600/5000] loss=0.2932 | mse=0.1953
|
| 144 |
+
2026-01-21 23:57:55 | INFO | [Task alignment][Step 4800/5000] loss=0.2916 | mse=0.1949
|
| 145 |
+
2026-01-21 23:58:12 | INFO | [Task alignment][Step 5000/5000] loss=0.2898 | mse=0.1938
|
| 146 |
+
2026-01-21 23:58:15 | INFO | [Val] alignment: loss=1.2016 | mse=2.6704
|
| 147 |
+
2026-01-21 23:58:15 | INFO | Task 'alignment' complete. Running validation...
|
| 148 |
+
2026-01-21 23:58:18 | INFO | [Final Val for alignment] loss=1.2016 | mse=2.6704
|
| 149 |
+
2026-01-21 23:58:18 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/task_alignment_final.pt
|
| 150 |
+
2026-01-21 23:58:18 | INFO |
|
| 151 |
+
============================================================
|
| 152 |
+
2026-01-21 23:58:18 | INFO | All tasks complete. Running final validation for all tasks...
|
| 153 |
+
2026-01-21 23:58:22 | INFO | [Final Val] musicality: loss=0.7602 | mse=0.9111
|
| 154 |
+
2026-01-21 23:58:25 | INFO | [Final Val] alignment: loss=1.2016 | mse=2.6704
|
| 155 |
+
2026-01-21 23:58:25 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/downstream_final.pt
|
| 156 |
+
2026-01-21 23:58:25 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/downstream_final.pt
|
downstream/20260123_0028/downstream_config.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
backbone:
|
| 9 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0056_tune_t5_transformer/ckpt/reward_model.best_49205.pt
|
| 10 |
+
freeze: true
|
| 11 |
+
from_ema: false
|
| 12 |
+
dataset:
|
| 13 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 14 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 15 |
+
heads:
|
| 16 |
+
hidden_dim: 768
|
| 17 |
+
init_from: score_projector
|
| 18 |
+
musicality:
|
| 19 |
+
use_mlp: true
|
| 20 |
+
ordinal: false
|
| 21 |
+
dropout: 0.1
|
| 22 |
+
num_categories: 9
|
| 23 |
+
y_min: 1.0
|
| 24 |
+
y_max: 5.0
|
| 25 |
+
step: 0.5
|
| 26 |
+
alignment:
|
| 27 |
+
use_mlp: true
|
| 28 |
+
ordinal: false
|
| 29 |
+
dropout: 0.1
|
| 30 |
+
num_categories: 9
|
| 31 |
+
y_min: 1.0
|
| 32 |
+
y_max: 5.0
|
| 33 |
+
step: 0.5
|
| 34 |
+
preference:
|
| 35 |
+
use_mlp: true
|
| 36 |
+
dropout: 0.1
|
| 37 |
+
train:
|
| 38 |
+
num_epochs: 10
|
| 39 |
+
num_train_steps: 2000
|
| 40 |
+
batch_size: 48
|
| 41 |
+
learning_rate: 0.001
|
| 42 |
+
weight_decay: 0.01
|
| 43 |
+
max_grad_norm: 1.0
|
| 44 |
+
warmup_steps: 100
|
| 45 |
+
schedule_type: cosine
|
| 46 |
+
min_lr_ratio: 0.01
|
| 47 |
+
dataset_mode: sequential
|
| 48 |
+
steps_per_task: 5000
|
| 49 |
+
log_interval: 200
|
| 50 |
+
val_interval: 1000
|
| 51 |
+
save_interval: 1000
|
| 52 |
+
num_workers: 8
|
| 53 |
+
resume: null
|
| 54 |
+
device: cuda:4
|
downstream/20260123_0028/train.log
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-23 00:28:23 | INFO | Starting downstream training: 20260123_0028
|
| 2 |
+
2026-01-23 00:28:23 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028
|
| 3 |
+
2026-01-23 00:28:23 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/downstream_config.yaml
|
| 4 |
+
2026-01-23 00:28:23 | INFO | Training tasks: ['musicality']
|
| 5 |
+
2026-01-23 00:28:23 | INFO | Dataset mode: sequential
|
| 6 |
+
2026-01-23 00:28:26 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0056_tune_t5_transformer/ckpt/reward_model.best_49205.pt
|
| 7 |
+
2026-01-23 00:28:27 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-23 00:28:33 | INFO | Skipping score_projector.3.weight: shape mismatch (ckpt torch.Size([1, 768]) vs model torch.Size([2, 768])), will use randomly initialized weights
|
| 9 |
+
2026-01-23 00:28:33 | INFO | Skipping score_projector.3.bias: shape mismatch (ckpt torch.Size([1]) vs model torch.Size([2])), will use randomly initialized weights
|
| 10 |
+
2026-01-23 00:28:33 | INFO | Missing keys (570): ['score_projector.3.weight', 'score_projector.3.bias', 'text_module.model.shared.weight', 'text_module.model.encoder.embed_tokens.weight', 'text_module.model.encoder.block.0.layer.0.SelfAttention.q.weight']...
|
| 11 |
+
2026-01-23 00:28:33 | WARNING | Missing keys: 59
|
| 12 |
+
2026-01-23 00:28:33 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0056_tune_t5_transformer/ckpt/reward_model.best_49205.pt
|
| 13 |
+
2026-01-23 00:28:33 | INFO | Created DownstreamTaskModel (backbone frozen)
|
| 14 |
+
2026-01-23 00:28:33 | INFO | Added mlp head for task 'musicality'
|
| 15 |
+
2026-01-23 00:28:33 | INFO | Initializing heads from backbone 'score_projector'
|
| 16 |
+
2026-01-23 00:28:33 | INFO | Initializing 1 heads from 'score_projector'
|
| 17 |
+
2026-01-23 00:28:33 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 18 |
+
2026-01-23 00:28:33 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 19 |
+
2026-01-23 00:28:33 | INFO | Mapped 3.weight -> 4.weight
|
| 20 |
+
2026-01-23 00:28:33 | INFO | Mapped 3.bias -> 4.bias
|
| 21 |
+
2026-01-23 00:28:33 | INFO | Loaded 6 parameters, 0 missing
|
| 22 |
+
2026-01-23 00:28:33 | INFO | ✓ Head initialized from 'score_projector'
|
| 23 |
+
2026-01-23 00:28:33 | INFO | ✓ All heads initialized
|
| 24 |
+
2026-01-23 00:28:33 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 25 |
+
2026-01-23 00:28:33 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 26 |
+
2026-01-23 00:28:33 | INFO | Task 'musicality': train=4322, test=913
|
| 27 |
+
2026-01-23 00:28:33 | INFO | [SEQUENTIAL MODE] Training 1 tasks, 5000 steps each
|
| 28 |
+
2026-01-23 00:28:33 | INFO |
|
| 29 |
+
============================================================
|
| 30 |
+
2026-01-23 00:28:33 | INFO | Starting Task 1/1: musicality
|
| 31 |
+
2026-01-23 00:28:33 | INFO | ============================================================
|
| 32 |
+
2026-01-23 00:28:33 | INFO | Task 'musicality' trainable parameters: 592,897
|
| 33 |
+
2026-01-23 00:29:35 | INFO | [Task musicality][Step 200/5000] loss=0.7903 | mse=1.1744
|
| 34 |
+
2026-01-23 00:30:26 | INFO | [Task musicality][Step 400/5000] loss=0.5796 | mse=0.5450
|
| 35 |
+
2026-01-23 00:31:22 | INFO | [Task musicality][Step 600/5000] loss=0.5725 | mse=0.5392
|
| 36 |
+
2026-01-23 00:32:25 | INFO | [Task musicality][Step 800/5000] loss=0.5767 | mse=0.5413
|
| 37 |
+
2026-01-23 00:33:21 | INFO | [Task musicality][Step 1000/5000] loss=0.5483 | mse=0.4924
|
| 38 |
+
2026-01-23 00:33:25 | INFO | [Val] musicality: loss=2.1414 | mse=5.9358
|
| 39 |
+
2026-01-23 00:34:22 | INFO | [Task musicality][Step 1200/5000] loss=0.5340 | mse=0.4705
|
| 40 |
+
2026-01-23 00:35:20 | INFO | [Task musicality][Step 1400/5000] loss=0.5328 | mse=0.4756
|
| 41 |
+
2026-01-23 00:36:11 | INFO | [Task musicality][Step 1600/5000] loss=0.5312 | mse=0.4669
|
| 42 |
+
2026-01-23 00:37:02 | INFO | [Task musicality][Step 1800/5000] loss=0.5304 | mse=0.4696
|
| 43 |
+
2026-01-23 00:38:00 | INFO | [Task musicality][Step 2000/5000] loss=0.5116 | mse=0.4377
|
| 44 |
+
2026-01-23 00:38:03 | INFO | [Val] musicality: loss=2.0244 | mse=5.7591
|
| 45 |
+
2026-01-23 00:38:55 | INFO | [Task musicality][Step 2200/5000] loss=0.5056 | mse=0.4309
|
| 46 |
+
2026-01-23 00:39:47 | INFO | [Task musicality][Step 2400/5000] loss=0.5109 | mse=0.4386
|
| 47 |
+
2026-01-23 00:40:44 | INFO | [Task musicality][Step 2600/5000] loss=0.4995 | mse=0.4218
|
| 48 |
+
2026-01-23 00:41:39 | INFO | [Task musicality][Step 2800/5000] loss=0.4991 | mse=0.4187
|
| 49 |
+
2026-01-23 00:42:29 | INFO | [Task musicality][Step 3000/5000] loss=0.4946 | mse=0.4163
|
| 50 |
+
2026-01-23 00:42:32 | INFO | [Val] musicality: loss=1.9526 | mse=5.2377
|
| 51 |
+
2026-01-23 00:43:24 | INFO | [Task musicality][Step 3200/5000] loss=0.4876 | mse=0.4065
|
| 52 |
+
2026-01-23 00:44:17 | INFO | [Task musicality][Step 3400/5000] loss=0.4854 | mse=0.4062
|
| 53 |
+
2026-01-23 00:45:09 | INFO | [Task musicality][Step 3600/5000] loss=0.4806 | mse=0.3975
|
| 54 |
+
2026-01-23 00:46:05 | INFO | [Task musicality][Step 3800/5000] loss=0.4834 | mse=0.3997
|
| 55 |
+
2026-01-23 00:47:00 | INFO | [Task musicality][Step 4000/5000] loss=0.4770 | mse=0.3938
|
| 56 |
+
2026-01-23 00:47:03 | INFO | [Val] musicality: loss=1.7301 | mse=4.4097
|
| 57 |
+
2026-01-23 00:47:54 | INFO | [Task musicality][Step 4200/5000] loss=0.4680 | mse=0.3779
|
| 58 |
+
2026-01-23 00:48:53 | INFO | [Task musicality][Step 4400/5000] loss=0.4696 | mse=0.3838
|
| 59 |
+
2026-01-23 00:49:55 | INFO | [Task musicality][Step 4600/5000] loss=0.4686 | mse=0.3824
|
| 60 |
+
2026-01-23 00:50:48 | INFO | [Task musicality][Step 4800/5000] loss=0.4648 | mse=0.3765
|
| 61 |
+
2026-01-23 00:51:41 | INFO | [Task musicality][Step 5000/5000] loss=0.4687 | mse=0.3847
|
| 62 |
+
2026-01-23 00:51:44 | INFO | [Val] musicality: loss=1.7043 | mse=4.3205
|
| 63 |
+
2026-01-23 00:51:44 | INFO | Task 'musicality' complete. Running validation...
|
| 64 |
+
2026-01-23 00:51:48 | INFO | [Final Val for musicality] loss=1.7043 | mse=4.3205
|
| 65 |
+
2026-01-23 00:51:48 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/ckpt/task_musicality_final.pt
|
| 66 |
+
2026-01-23 00:51:48 | INFO |
|
| 67 |
+
============================================================
|
| 68 |
+
2026-01-23 00:51:48 | INFO | All tasks complete. Running final validation for all tasks...
|
| 69 |
+
2026-01-23 00:51:51 | INFO | [Final Val] musicality: loss=1.7043 | mse=4.3205
|
| 70 |
+
2026-01-23 00:51:51 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/ckpt/downstream_final.pt
|
| 71 |
+
2026-01-23 00:51:51 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/ckpt/downstream_final.pt
|
downstream_mixed/20260122_1200/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream_mixed
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: false
|
| 13 |
+
freeze_encoder_only: true
|
| 14 |
+
from_ema: false
|
| 15 |
+
dataset:
|
| 16 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 17 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 18 |
+
heads:
|
| 19 |
+
hidden_dim: 768
|
| 20 |
+
init_from: score_projector
|
| 21 |
+
musicality:
|
| 22 |
+
use_mlp: true
|
| 23 |
+
ordinal: false
|
| 24 |
+
dropout: 0.1
|
| 25 |
+
num_categories: 9
|
| 26 |
+
y_min: 1.0
|
| 27 |
+
y_max: 5.0
|
| 28 |
+
step: 0.5
|
| 29 |
+
alignment:
|
| 30 |
+
use_mlp: true
|
| 31 |
+
ordinal: false
|
| 32 |
+
dropout: 0.1
|
| 33 |
+
num_categories: 9
|
| 34 |
+
y_min: 1.0
|
| 35 |
+
y_max: 5.0
|
| 36 |
+
step: 0.5
|
| 37 |
+
preference:
|
| 38 |
+
use_mlp: true
|
| 39 |
+
dropout: 0.1
|
| 40 |
+
train:
|
| 41 |
+
dataset_mode: mixed
|
| 42 |
+
num_train_steps: 5000
|
| 43 |
+
batch_size: 32
|
| 44 |
+
learning_rate: 0.0001
|
| 45 |
+
backbone_learning_rate: 1.0e-05
|
| 46 |
+
weight_decay: 0.01
|
| 47 |
+
max_grad_norm: 1.0
|
| 48 |
+
warmup_steps: 200
|
| 49 |
+
schedule_type: cosine
|
| 50 |
+
min_lr_ratio: 0.01
|
| 51 |
+
log_interval: 100
|
| 52 |
+
val_interval: 500
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda:1
|
downstream_mixed/20260122_1200/train.log
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-22 12:00:33 | INFO | Starting downstream training: 20260122_1200
|
| 2 |
+
2026-01-22 12:00:33 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200
|
| 3 |
+
2026-01-22 12:00:33 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/config.yaml
|
| 4 |
+
2026-01-22 12:00:33 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-22 12:00:33 | INFO | Dataset mode: mixed
|
| 6 |
+
2026-01-22 12:00:36 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-22 12:00:36 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-22 12:00:43 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
|
| 9 |
+
2026-01-22 12:00:43 | WARNING | Missing keys: 283
|
| 10 |
+
2026-01-22 12:00:44 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
2026-01-22 12:00:44 | INFO | Created MixedDownstreamTaskModel (freeze_encoder_only=True)
|
| 12 |
+
2026-01-22 12:00:44 | INFO | Added mlp head for task 'musicality'
|
| 13 |
+
2026-01-22 12:00:44 | INFO | Added mlp head for task 'alignment'
|
| 14 |
+
2026-01-22 12:00:44 | INFO | Added mlp head for task 'preference'
|
| 15 |
+
2026-01-22 12:00:44 | INFO | Initializing heads from backbone 'score_projector'
|
| 16 |
+
2026-01-22 12:00:44 | INFO | Initializing 3 heads from 'score_projector'
|
| 17 |
+
2026-01-22 12:00:44 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 18 |
+
2026-01-22 12:00:44 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 19 |
+
2026-01-22 12:00:44 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 20 |
+
2026-01-22 12:00:44 | INFO | Loaded 6 parameters, 0 missing
|
| 21 |
+
2026-01-22 12:00:44 | INFO | ✓ Head initialized from 'score_projector'
|
| 22 |
+
2026-01-22 12:00:44 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 23 |
+
2026-01-22 12:00:44 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 24 |
+
2026-01-22 12:00:44 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 25 |
+
2026-01-22 12:00:44 | INFO | Loaded 6 parameters, 0 missing
|
| 26 |
+
2026-01-22 12:00:44 | INFO | ✓ Head initialized from 'score_projector'
|
| 27 |
+
2026-01-22 12:00:44 | INFO | Task 'preference': type=mlp, ordinal=False
|
| 28 |
+
2026-01-22 12:00:44 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 29 |
+
2026-01-22 12:00:44 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 30 |
+
2026-01-22 12:00:44 | INFO | Loaded 6 parameters, 0 missing
|
| 31 |
+
2026-01-22 12:00:44 | INFO | ✓ Head initialized from 'score_projector'
|
| 32 |
+
2026-01-22 12:00:44 | INFO | ✓ All heads initialized
|
| 33 |
+
2026-01-22 12:00:44 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 34 |
+
2026-01-22 12:00:44 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 35 |
+
2026-01-22 12:00:44 | INFO | Task 'musicality': train=4322, test=913
|
| 36 |
+
2026-01-22 12:00:44 | INFO | Task 'alignment': train=1923, test=913
|
| 37 |
+
2026-01-22 12:00:44 | INFO | Task 'preference': train=1065, test=275
|
| 38 |
+
2026-01-22 12:00:44 | INFO | Backbone trainable parameters: 21,279,237 (lr=1e-05)
|
| 39 |
+
2026-01-22 12:00:44 | INFO | Head parameters: 1,778,691 (lr=0.0001)
|
| 40 |
+
2026-01-22 12:00:44 | INFO | Total trainable parameters: 23,057,928
|
| 41 |
+
2026-01-22 12:00:44 | INFO | [MIXED MODE] Starting training for 5000 steps
|
| 42 |
+
2026-01-22 12:00:44 | INFO | Backbone LR: 1e-05, Head LR: 0.0001
|
| 43 |
+
2026-01-22 12:03:00 | INFO | [Step 100] musicality/loss=4.0637 | musicality/mse=26.2373 | alignment/loss=3.0336 | alignment/mse=14.6745 | preference/loss=1.5111 | preference/accuracy=0.5675 | lr_backbone=5.05e-06 | lr_heads=5.05e-05
|
| 44 |
+
2026-01-22 12:04:58 | INFO | [Step 200] musicality/loss=1.0183 | musicality/mse=1.7072 | alignment/loss=0.9960 | alignment/mse=1.6934 | preference/loss=0.6578 | preference/accuracy=0.6678 | lr_backbone=1.00e-05 | lr_heads=1.00e-04
|
| 45 |
+
2026-01-22 12:06:51 | INFO | [Step 300] musicality/loss=0.6885 | musicality/mse=0.7626 | alignment/loss=0.6725 | alignment/mse=0.7337 | preference/loss=0.5054 | preference/accuracy=0.7491 | lr_backbone=9.99e-06 | lr_heads=9.99e-05
|
| 46 |
+
2026-01-22 12:08:47 | INFO | [Step 400] musicality/loss=0.6275 | musicality/mse=0.6383 | alignment/loss=0.6024 | alignment/mse=0.5992 | preference/loss=0.4471 | preference/accuracy=0.7819 | lr_backbone=9.96e-06 | lr_heads=9.96e-05
|
| 47 |
+
2026-01-22 12:10:39 | INFO | [Step 500] musicality/loss=0.5644 | musicality/mse=0.5226 | alignment/loss=0.5718 | alignment/mse=0.5490 | preference/loss=0.4029 | preference/accuracy=0.8100 | lr_backbone=9.90e-06 | lr_heads=9.90e-05
|
| 48 |
+
2026-01-22 12:10:39 | INFO | [Step 500] Running validation...
|
| 49 |
+
2026-01-22 12:10:44 | INFO | [Val] musicality: loss=0.6608 | mse=0.6632
|
| 50 |
+
2026-01-22 12:10:49 | INFO | [Val] alignment: loss=0.6571 | mse=0.7059
|
| 51 |
+
2026-01-22 12:10:59 | INFO | [Val] preference: loss=0.5869 | accuracy=0.7231
|
| 52 |
+
2026-01-22 12:10:59 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_best.pt (81.2MB, 58 params)
|
| 53 |
+
2026-01-22 12:10:59 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_best_full.pt (161.6MB, 3 heads)
|
| 54 |
+
2026-01-22 12:10:59 | INFO | New best model saved (val_loss=0.6349)
|
| 55 |
+
2026-01-22 12:13:03 | INFO | [Step 600] musicality/loss=0.5445 | musicality/mse=0.4930 | alignment/loss=0.5371 | alignment/mse=0.4832 | preference/loss=0.3639 | preference/accuracy=0.8369 | lr_backbone=9.83e-06 | lr_heads=9.83e-05
|
| 56 |
+
2026-01-22 12:15:04 | INFO | [Step 700] musicality/loss=0.5212 | musicality/mse=0.4580 | alignment/loss=0.5114 | alignment/mse=0.4531 | preference/loss=0.3288 | preference/accuracy=0.8612 | lr_backbone=9.73e-06 | lr_heads=9.73e-05
|
| 57 |
+
2026-01-22 12:17:04 | INFO | [Step 800] musicality/loss=0.5041 | musicality/mse=0.4401 | alignment/loss=0.4980 | alignment/mse=0.4283 | preference/loss=0.3097 | preference/accuracy=0.8694 | lr_backbone=9.62e-06 | lr_heads=9.62e-05
|
| 58 |
+
2026-01-22 12:19:00 | INFO | [Step 900] musicality/loss=0.4869 | musicality/mse=0.4069 | alignment/loss=0.4819 | alignment/mse=0.4107 | preference/loss=0.2636 | preference/accuracy=0.8972 | lr_backbone=9.48e-06 | lr_heads=9.48e-05
|
| 59 |
+
2026-01-22 12:20:56 | INFO | [Step 1000] musicality/loss=0.4772 | musicality/mse=0.3909 | alignment/loss=0.4657 | alignment/mse=0.3832 | preference/loss=0.2406 | preference/accuracy=0.9075 | lr_backbone=9.33e-06 | lr_heads=9.33e-05
|
| 60 |
+
2026-01-22 12:20:56 | INFO | [Step 1000] Running validation...
|
| 61 |
+
2026-01-22 12:21:01 | INFO | [Val] musicality: loss=0.6333 | mse=0.6206
|
| 62 |
+
2026-01-22 12:21:09 | INFO | [Val] alignment: loss=0.6804 | mse=0.7634
|
| 63 |
+
2026-01-22 12:21:18 | INFO | [Val] preference: loss=0.6525 | accuracy=0.7290
|
| 64 |
+
2026-01-22 12:21:18 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_1000.pt (81.2MB, 58 params)
|
| 65 |
+
2026-01-22 12:21:18 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_1000_full.pt (161.6MB, 3 heads)
|
| 66 |
+
2026-01-22 12:23:17 | INFO | [Step 1100] musicality/loss=0.4655 | musicality/mse=0.3807 | alignment/loss=0.4512 | alignment/mse=0.3668 | preference/loss=0.2191 | preference/accuracy=0.9225 | lr_backbone=9.16e-06 | lr_heads=9.16e-05
|
| 67 |
+
2026-01-22 12:25:13 | INFO | [Step 1200] musicality/loss=0.4532 | musicality/mse=0.3656 | alignment/loss=0.4378 | alignment/mse=0.3502 | preference/loss=0.1958 | preference/accuracy=0.9363 | lr_backbone=8.97e-06 | lr_heads=8.97e-05
|
| 68 |
+
2026-01-22 12:27:18 | INFO | [Step 1300] musicality/loss=0.4398 | musicality/mse=0.3375 | alignment/loss=0.4246 | alignment/mse=0.3317 | preference/loss=0.1737 | preference/accuracy=0.9472 | lr_backbone=8.76e-06 | lr_heads=8.76e-05
|
| 69 |
+
2026-01-22 12:29:21 | INFO | [Step 1400] musicality/loss=0.4341 | musicality/mse=0.3397 | alignment/loss=0.4080 | alignment/mse=0.3086 | preference/loss=0.1520 | preference/accuracy=0.9534 | lr_backbone=8.54e-06 | lr_heads=8.54e-05
|
| 70 |
+
2026-01-22 12:31:24 | INFO | [Step 1500] musicality/loss=0.4334 | musicality/mse=0.3378 | alignment/loss=0.4012 | alignment/mse=0.2984 | preference/loss=0.1414 | preference/accuracy=0.9547 | lr_backbone=8.30e-06 | lr_heads=8.30e-05
|
| 71 |
+
2026-01-22 12:31:24 | INFO | [Step 1500] Running validation...
|
| 72 |
+
2026-01-22 12:31:28 | INFO | [Val] musicality: loss=0.6763 | mse=0.7138
|
| 73 |
+
2026-01-22 12:31:33 | INFO | [Val] alignment: loss=0.7246 | mse=0.8572
|
| 74 |
+
2026-01-22 12:31:40 | INFO | [Val] preference: loss=0.8507 | accuracy=0.7173
|
| 75 |
+
2026-01-22 12:33:37 | INFO | [Step 1600] musicality/loss=0.4255 | musicality/mse=0.3209 | alignment/loss=0.3842 | alignment/mse=0.2749 | preference/loss=0.1293 | preference/accuracy=0.9566 | lr_backbone=8.04e-06 | lr_heads=8.04e-05
|
| 76 |
+
2026-01-22 12:35:41 | INFO | [Step 1700] musicality/loss=0.4066 | musicality/mse=0.3057 | alignment/loss=0.3841 | alignment/mse=0.2792 | preference/loss=0.1069 | preference/accuracy=0.9703 | lr_backbone=7.78e-06 | lr_heads=7.78e-05
|
| 77 |
+
2026-01-22 12:37:40 | INFO | [Step 1800] musicality/loss=0.4080 | musicality/mse=0.3009 | alignment/loss=0.3715 | alignment/mse=0.2686 | preference/loss=0.1050 | preference/accuracy=0.9722 | lr_backbone=7.50e-06 | lr_heads=7.50e-05
|
| 78 |
+
2026-01-22 12:39:42 | INFO | [Step 1900] musicality/loss=0.3994 | musicality/mse=0.2916 | alignment/loss=0.3563 | alignment/mse=0.2474 | preference/loss=0.0925 | preference/accuracy=0.9759 | lr_backbone=7.21e-06 | lr_heads=7.21e-05
|
| 79 |
+
2026-01-22 12:41:44 | INFO | [Step 2000] musicality/loss=0.3932 | musicality/mse=0.2833 | alignment/loss=0.3522 | alignment/mse=0.2453 | preference/loss=0.0871 | preference/accuracy=0.9759 | lr_backbone=6.91e-06 | lr_heads=6.91e-05
|
| 80 |
+
2026-01-22 12:41:44 | INFO | [Step 2000] Running validation...
|
| 81 |
+
2026-01-22 12:41:48 | INFO | [Val] musicality: loss=0.6617 | mse=0.6857
|
| 82 |
+
2026-01-22 12:41:52 | INFO | [Val] alignment: loss=0.7773 | mse=0.9801
|
| 83 |
+
2026-01-22 12:41:59 | INFO | [Val] preference: loss=1.0762 | accuracy=0.6999
|
| 84 |
+
2026-01-22 12:42:00 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_2000.pt (81.2MB, 58 params)
|
| 85 |
+
2026-01-22 12:42:00 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_2000_full.pt (161.6MB, 3 heads)
|
| 86 |
+
2026-01-22 12:43:55 | INFO | [Step 2100] musicality/loss=0.3933 | musicality/mse=0.2810 | alignment/loss=0.3404 | alignment/mse=0.2300 | preference/loss=0.0796 | preference/accuracy=0.9797 | lr_backbone=6.61e-06 | lr_heads=6.61e-05
|
| 87 |
+
2026-01-22 12:45:52 | INFO | [Step 2200] musicality/loss=0.3666 | musicality/mse=0.2535 | alignment/loss=0.3335 | alignment/mse=0.2198 | preference/loss=0.0720 | preference/accuracy=0.9822 | lr_backbone=6.29e-06 | lr_heads=6.29e-05
|
| 88 |
+
2026-01-22 12:47:46 | INFO | [Step 2300] musicality/loss=0.3828 | musicality/mse=0.2731 | alignment/loss=0.3260 | alignment/mse=0.2147 | preference/loss=0.0662 | preference/accuracy=0.9856 | lr_backbone=5.98e-06 | lr_heads=5.98e-05
|
| 89 |
+
2026-01-22 12:49:37 | INFO | [Step 2400] musicality/loss=0.3704 | musicality/mse=0.2589 | alignment/loss=0.3215 | alignment/mse=0.2067 | preference/loss=0.0641 | preference/accuracy=0.9831 | lr_backbone=5.65e-06 | lr_heads=5.65e-05
|
| 90 |
+
2026-01-22 12:51:41 | INFO | [Step 2500] musicality/loss=0.3680 | musicality/mse=0.2558 | alignment/loss=0.3119 | alignment/mse=0.1950 | preference/loss=0.0548 | preference/accuracy=0.9866 | lr_backbone=5.33e-06 | lr_heads=5.33e-05
|
| 91 |
+
2026-01-22 12:51:41 | INFO | [Step 2500] Running validation...
|
| 92 |
+
2026-01-22 12:51:45 | INFO | [Val] musicality: loss=0.6730 | mse=0.7145
|
| 93 |
+
2026-01-22 12:51:49 | INFO | [Val] alignment: loss=0.7797 | mse=0.9899
|
| 94 |
+
2026-01-22 12:51:56 | INFO | [Val] preference: loss=1.1633 | accuracy=0.7127
|
| 95 |
+
2026-01-22 12:53:51 | INFO | [Step 2600] musicality/loss=0.3629 | musicality/mse=0.2462 | alignment/loss=0.3097 | alignment/mse=0.1931 | preference/loss=0.0521 | preference/accuracy=0.9884 | lr_backbone=5.00e-06 | lr_heads=5.00e-05
|
| 96 |
+
2026-01-22 12:55:42 | INFO | [Step 2700] musicality/loss=0.3622 | musicality/mse=0.2491 | alignment/loss=0.2991 | alignment/mse=0.1803 | preference/loss=0.0474 | preference/accuracy=0.9900 | lr_backbone=4.67e-06 | lr_heads=4.67e-05
|
| 97 |
+
2026-01-22 12:57:46 | INFO | [Step 2800] musicality/loss=0.3593 | musicality/mse=0.2445 | alignment/loss=0.2913 | alignment/mse=0.1758 | preference/loss=0.0468 | preference/accuracy=0.9919 | lr_backbone=4.35e-06 | lr_heads=4.35e-05
|
| 98 |
+
2026-01-22 12:59:53 | INFO | [Step 2900] musicality/loss=0.3444 | musicality/mse=0.2250 | alignment/loss=0.3002 | alignment/mse=0.1828 | preference/loss=0.0455 | preference/accuracy=0.9903 | lr_backbone=4.02e-06 | lr_heads=4.02e-05
|
| 99 |
+
2026-01-22 13:01:55 | INFO | [Step 3000] musicality/loss=0.3463 | musicality/mse=0.2247 | alignment/loss=0.2832 | alignment/mse=0.1671 | preference/loss=0.0444 | preference/accuracy=0.9903 | lr_backbone=3.71e-06 | lr_heads=3.71e-05
|
| 100 |
+
2026-01-22 13:01:55 | INFO | [Step 3000] Running validation...
|
| 101 |
+
2026-01-22 13:01:58 | INFO | [Val] musicality: loss=0.6822 | mse=0.7429
|
| 102 |
+
2026-01-22 13:02:02 | INFO | [Val] alignment: loss=0.8000 | mse=1.0463
|
| 103 |
+
2026-01-22 13:02:09 | INFO | [Val] preference: loss=1.2784 | accuracy=0.7058
|
| 104 |
+
2026-01-22 13:02:09 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_3000.pt (81.2MB, 58 params)
|
| 105 |
+
2026-01-22 13:02:09 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_3000_full.pt (161.6MB, 3 heads)
|
| 106 |
+
2026-01-22 13:04:19 | INFO | [Step 3100] musicality/loss=0.3449 | musicality/mse=0.2249 | alignment/loss=0.2821 | alignment/mse=0.1644 | preference/loss=0.0420 | preference/accuracy=0.9912 | lr_backbone=3.39e-06 | lr_heads=3.39e-05
|
| 107 |
+
2026-01-22 13:06:23 | INFO | [Step 3200] musicality/loss=0.3391 | musicality/mse=0.2169 | alignment/loss=0.2769 | alignment/mse=0.1577 | preference/loss=0.0362 | preference/accuracy=0.9916 | lr_backbone=3.09e-06 | lr_heads=3.09e-05
|
| 108 |
+
2026-01-22 13:08:21 | INFO | [Step 3300] musicality/loss=0.3372 | musicality/mse=0.2252 | alignment/loss=0.2789 | alignment/mse=0.1595 | preference/loss=0.0374 | preference/accuracy=0.9928 | lr_backbone=2.79e-06 | lr_heads=2.79e-05
|
| 109 |
+
2026-01-22 13:10:26 | INFO | [Step 3400] musicality/loss=0.3323 | musicality/mse=0.2121 | alignment/loss=0.2744 | alignment/mse=0.1610 | preference/loss=0.0376 | preference/accuracy=0.9897 | lr_backbone=2.50e-06 | lr_heads=2.50e-05
|
| 110 |
+
2026-01-22 13:12:22 | INFO | [Step 3500] musicality/loss=0.3326 | musicality/mse=0.2191 | alignment/loss=0.2689 | alignment/mse=0.1545 | preference/loss=0.0375 | preference/accuracy=0.9922 | lr_backbone=2.22e-06 | lr_heads=2.22e-05
|
| 111 |
+
2026-01-22 13:12:22 | INFO | [Step 3500] Running validation...
|
| 112 |
+
2026-01-22 13:12:26 | INFO | [Val] musicality: loss=0.7052 | mse=0.7941
|
| 113 |
+
2026-01-22 13:12:31 | INFO | [Val] alignment: loss=0.7979 | mse=1.0505
|
| 114 |
+
2026-01-22 13:12:38 | INFO | [Val] preference: loss=1.3824 | accuracy=0.6871
|
| 115 |
+
2026-01-22 13:14:37 | INFO | [Step 3600] musicality/loss=0.3297 | musicality/mse=0.2113 | alignment/loss=0.2602 | alignment/mse=0.1440 | preference/loss=0.0351 | preference/accuracy=0.9928 | lr_backbone=1.96e-06 | lr_heads=1.96e-05
|
| 116 |
+
2026-01-22 13:16:45 | INFO | [Step 3700] musicality/loss=0.3212 | musicality/mse=0.2035 | alignment/loss=0.2628 | alignment/mse=0.1474 | preference/loss=0.0336 | preference/accuracy=0.9928 | lr_backbone=1.70e-06 | lr_heads=1.70e-05
|
| 117 |
+
2026-01-22 13:18:51 | INFO | [Step 3800] musicality/loss=0.3207 | musicality/mse=0.1961 | alignment/loss=0.2625 | alignment/mse=0.1466 | preference/loss=0.0336 | preference/accuracy=0.9941 | lr_backbone=1.46e-06 | lr_heads=1.46e-05
|
| 118 |
+
2026-01-22 13:20:47 | INFO | [Step 3900] musicality/loss=0.3257 | musicality/mse=0.2132 | alignment/loss=0.2556 | alignment/mse=0.1387 | preference/loss=0.0300 | preference/accuracy=0.9953 | lr_backbone=1.24e-06 | lr_heads=1.24e-05
|
| 119 |
+
2026-01-22 13:22:53 | INFO | [Step 4000] musicality/loss=0.3214 | musicality/mse=0.2049 | alignment/loss=0.2551 | alignment/mse=0.1389 | preference/loss=0.0310 | preference/accuracy=0.9966 | lr_backbone=1.03e-06 | lr_heads=1.03e-05
|
| 120 |
+
2026-01-22 13:22:53 | INFO | [Step 4000] Running validation...
|
| 121 |
+
2026-01-22 13:22:58 | INFO | [Val] musicality: loss=0.6972 | mse=0.7796
|
| 122 |
+
2026-01-22 13:23:02 | INFO | [Val] alignment: loss=0.8132 | mse=1.0816
|
| 123 |
+
2026-01-22 13:23:10 | INFO | [Val] preference: loss=1.4036 | accuracy=0.6965
|
| 124 |
+
2026-01-22 13:23:10 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_4000.pt (81.2MB, 58 params)
|
| 125 |
+
2026-01-22 13:23:10 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_4000_full.pt (161.6MB, 3 heads)
|
| 126 |
+
2026-01-22 13:25:13 | INFO | [Step 4100] musicality/loss=0.3218 | musicality/mse=0.2021 | alignment/loss=0.2590 | alignment/mse=0.1476 | preference/loss=0.0311 | preference/accuracy=0.9934 | lr_backbone=8.43e-07 | lr_heads=8.43e-06
|
| 127 |
+
2026-01-22 13:27:13 | INFO | [Step 4200] musicality/loss=0.3236 | musicality/mse=0.2082 | alignment/loss=0.2549 | alignment/mse=0.1375 | preference/loss=0.0300 | preference/accuracy=0.9956 | lr_backbone=6.70e-07 | lr_heads=6.70e-06
|
| 128 |
+
2026-01-22 13:29:11 | INFO | [Step 4300] musicality/loss=0.3143 | musicality/mse=0.1926 | alignment/loss=0.2508 | alignment/mse=0.1364 | preference/loss=0.0328 | preference/accuracy=0.9938 | lr_backbone=5.16e-07 | lr_heads=5.16e-06
|
| 129 |
+
2026-01-22 13:31:10 | INFO | [Step 4400] musicality/loss=0.3274 | musicality/mse=0.2154 | alignment/loss=0.2571 | alignment/mse=0.1395 | preference/loss=0.0303 | preference/accuracy=0.9941 | lr_backbone=3.81e-07 | lr_heads=3.81e-06
|
| 130 |
+
2026-01-22 13:33:13 | INFO | [Step 4500] musicality/loss=0.3214 | musicality/mse=0.2035 | alignment/loss=0.2498 | alignment/mse=0.1368 | preference/loss=0.0318 | preference/accuracy=0.9947 | lr_backbone=2.65e-07 | lr_heads=2.65e-06
|
| 131 |
+
2026-01-22 13:33:13 | INFO | [Step 4500] Running validation...
|
| 132 |
+
2026-01-22 13:33:17 | INFO | [Val] musicality: loss=0.6957 | mse=0.7749
|
| 133 |
+
2026-01-22 13:33:21 | INFO | [Val] alignment: loss=0.8114 | mse=1.0750
|
| 134 |
+
2026-01-22 13:33:28 | INFO | [Val] preference: loss=1.4276 | accuracy=0.6965
|
| 135 |
+
2026-01-22 13:35:34 | INFO | [Step 4600] musicality/loss=0.3194 | musicality/mse=0.1995 | alignment/loss=0.2504 | alignment/mse=0.1391 | preference/loss=0.0274 | preference/accuracy=0.9966 | lr_backbone=1.70e-07 | lr_heads=1.70e-06
|
| 136 |
+
2026-01-22 13:37:34 | INFO | [Step 4700] musicality/loss=0.3210 | musicality/mse=0.2021 | alignment/loss=0.2475 | alignment/mse=0.1335 | preference/loss=0.0311 | preference/accuracy=0.9947 | lr_backbone=9.61e-08 | lr_heads=9.61e-07
|
| 137 |
+
2026-01-22 13:39:36 | INFO | [Step 4800] musicality/loss=0.3196 | musicality/mse=0.2007 | alignment/loss=0.2581 | alignment/mse=0.1434 | preference/loss=0.0277 | preference/accuracy=0.9962 | lr_backbone=4.28e-08 | lr_heads=4.28e-07
|
| 138 |
+
2026-01-22 13:41:37 | INFO | [Step 4900] musicality/loss=0.3171 | musicality/mse=0.2014 | alignment/loss=0.2531 | alignment/mse=0.1374 | preference/loss=0.0261 | preference/accuracy=0.9975 | lr_backbone=1.07e-08 | lr_heads=1.07e-07
|
| 139 |
+
2026-01-22 13:43:33 | INFO | [Step 5000] musicality/loss=0.3200 | musicality/mse=0.1975 | alignment/loss=0.2537 | alignment/mse=0.1386 | preference/loss=0.0290 | preference/accuracy=0.9944 | lr_backbone=0.00e+00 | lr_heads=0.00e+00
|
| 140 |
+
2026-01-22 13:43:33 | INFO | [Step 5000] Running validation...
|
| 141 |
+
2026-01-22 13:43:37 | INFO | [Val] musicality: loss=0.6978 | mse=0.7797
|
| 142 |
+
2026-01-22 13:43:42 | INFO | [Val] alignment: loss=0.8105 | mse=1.0741
|
| 143 |
+
2026-01-22 13:43:49 | INFO | [Val] preference: loss=1.4317 | accuracy=0.6930
|
| 144 |
+
2026-01-22 13:43:49 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_5000.pt (81.2MB, 58 params)
|
| 145 |
+
2026-01-22 13:43:49 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_5000_full.pt (161.6MB, 3 heads)
|
| 146 |
+
2026-01-22 13:43:49 | INFO | Training complete. Running final validation...
|
| 147 |
+
2026-01-22 13:43:54 | INFO | [Final Val] musicality: loss=0.6978 | mse=0.7797
|
| 148 |
+
2026-01-22 13:43:59 | INFO | [Final Val] alignment: loss=0.8105 | mse=1.0741
|
| 149 |
+
2026-01-22 13:44:08 | INFO | [Final Val] preference: loss=1.4317 | accuracy=0.6930
|
| 150 |
+
2026-01-22 13:44:08 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_final.pt (81.2MB, 58 params)
|
| 151 |
+
2026-01-22 13:44:08 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_final_full.pt (161.6MB, 3 heads)
|
| 152 |
+
2026-01-22 13:44:08 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_final.pt
|
downstream_mixed/20260122_1955/config.yaml
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
attention_mode: CA
|
| 2 |
+
attn_dropout: 0.0
|
| 3 |
+
category_embeddings: null
|
| 4 |
+
dim: 768
|
| 5 |
+
dim_head: 64
|
| 6 |
+
downsample:
|
| 7 |
+
configs:
|
| 8 |
+
conv2_4x:
|
| 9 |
+
factor: 4
|
| 10 |
+
kernel_size: 5
|
| 11 |
+
kind: conv*2
|
| 12 |
+
use_layernorm: true
|
| 13 |
+
conv_4x:
|
| 14 |
+
factor: 4
|
| 15 |
+
kernel_size: 5
|
| 16 |
+
kind: conv
|
| 17 |
+
stage: 1
|
| 18 |
+
use_layernorm: true
|
| 19 |
+
glu_4x:
|
| 20 |
+
factor: 4
|
| 21 |
+
kernel_size: 5
|
| 22 |
+
kind: gluconv*2+pw
|
| 23 |
+
use_layernorm: true
|
| 24 |
+
mean:
|
| 25 |
+
factor: 2
|
| 26 |
+
kind: mean
|
| 27 |
+
mean_4x:
|
| 28 |
+
dropout: 0.0
|
| 29 |
+
factor: 30
|
| 30 |
+
kind: mean+mlp
|
| 31 |
+
mlp_ratio: 2.0
|
| 32 |
+
none:
|
| 33 |
+
factor: 1
|
| 34 |
+
kind: none
|
| 35 |
+
eval: mean_4x
|
| 36 |
+
ref: null
|
| 37 |
+
text: none
|
| 38 |
+
ff_dropout: 0.0
|
| 39 |
+
ff_mult: 4
|
| 40 |
+
freeze_audio: true
|
| 41 |
+
freeze_text: true
|
| 42 |
+
heads: 8
|
| 43 |
+
joint_tf_depth: 1
|
| 44 |
+
load_config:
|
| 45 |
+
checkpoint_path: null
|
| 46 |
+
frozen_from_pretrained: true
|
| 47 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 48 |
+
strict: false
|
| 49 |
+
mlp_dim: 768
|
| 50 |
+
mode: text_only
|
| 51 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 52 |
+
name: reward
|
| 53 |
+
null_embedding:
|
| 54 |
+
audio:
|
| 55 |
+
dropout: 0.5
|
| 56 |
+
length: 10
|
| 57 |
+
lyrics:
|
| 58 |
+
dropout: 0.3
|
| 59 |
+
length: 10
|
| 60 |
+
text:
|
| 61 |
+
dropout: 0.2
|
| 62 |
+
length: 10
|
| 63 |
+
output_dim: 2
|
| 64 |
+
prompt_tf_depth: 1
|
| 65 |
+
sr: 24000
|
| 66 |
+
text_encoder: muq_mulan
|
| 67 |
+
text_lora_config: null
|
| 68 |
+
train_muq_depth: 0
|
| 69 |
+
use_layer_idx: -1
|
downstream_mixed/20260122_1955/downstream_config.yaml
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream_mixed
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: false
|
| 13 |
+
freeze_encoder_only: true
|
| 14 |
+
from_ema: false
|
| 15 |
+
dataset:
|
| 16 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 17 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 18 |
+
heads:
|
| 19 |
+
hidden_dim: 768
|
| 20 |
+
init_from: score_projector
|
| 21 |
+
musicality:
|
| 22 |
+
use_mlp: true
|
| 23 |
+
ordinal: false
|
| 24 |
+
dropout: 0.1
|
| 25 |
+
use_tanh: true
|
| 26 |
+
num_categories: 9
|
| 27 |
+
y_min: 1.0
|
| 28 |
+
y_max: 5.0
|
| 29 |
+
step: 0.5
|
| 30 |
+
alignment:
|
| 31 |
+
use_mlp: true
|
| 32 |
+
use_tanh: true
|
| 33 |
+
ordinal: false
|
| 34 |
+
dropout: 0.1
|
| 35 |
+
num_categories: 9
|
| 36 |
+
y_min: 1.0
|
| 37 |
+
y_max: 5.0
|
| 38 |
+
step: 0.5
|
| 39 |
+
preference:
|
| 40 |
+
use_mlp: true
|
| 41 |
+
dropout: 0.1
|
| 42 |
+
train:
|
| 43 |
+
dataset_mode: mixed
|
| 44 |
+
num_train_steps: 5000
|
| 45 |
+
batch_size: 32
|
| 46 |
+
learning_rate: 0.0001
|
| 47 |
+
backbone_learning_rate: 1.0e-05
|
| 48 |
+
weight_decay: 0.01
|
| 49 |
+
max_grad_norm: 1.0
|
| 50 |
+
warmup_steps: 200
|
| 51 |
+
schedule_type: cosine
|
| 52 |
+
min_lr_ratio: 0.01
|
| 53 |
+
log_interval: 100
|
| 54 |
+
val_interval: 500
|
| 55 |
+
save_interval: 1000
|
| 56 |
+
num_workers: 8
|
| 57 |
+
resume: null
|
| 58 |
+
device: cuda
|
downstream_mixed/20260122_1955/train.log
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-22 19:55:26 | INFO | Starting downstream training: 20260122_1955
|
| 2 |
+
2026-01-22 19:55:26 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955
|
| 3 |
+
2026-01-22 19:55:26 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/downstream_config.yaml
|
| 4 |
+
2026-01-22 19:55:26 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-22 19:55:26 | INFO | Dataset mode: mixed
|
| 6 |
+
2026-01-22 19:55:29 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-22 19:55:29 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-22 19:55:35 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
|
| 9 |
+
2026-01-22 19:55:35 | WARNING | Missing keys: 283
|
| 10 |
+
2026-01-22 19:55:36 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
2026-01-22 19:55:36 | INFO | Created MixedDownstreamTaskModel (freeze_encoder_only=True)
|
| 12 |
+
2026-01-22 19:55:36 | INFO | Added mlp head for task 'musicality'
|
| 13 |
+
2026-01-22 19:55:36 | INFO | Added mlp head for task 'alignment'
|
| 14 |
+
2026-01-22 19:55:36 | INFO | Added mlp head for task 'preference'
|
| 15 |
+
2026-01-22 19:55:36 | INFO | Initializing heads from backbone 'score_projector'
|
| 16 |
+
2026-01-22 19:55:36 | INFO | Initializing 3 heads from 'score_projector'
|
| 17 |
+
2026-01-22 19:55:36 | INFO | Task 'musicality': type=mlp, ordinal=False
|
| 18 |
+
2026-01-22 19:55:36 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 19 |
+
2026-01-22 19:55:36 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 20 |
+
2026-01-22 19:55:36 | INFO | Loaded 6 parameters, 0 missing
|
| 21 |
+
2026-01-22 19:55:36 | INFO | ✓ Head initialized from 'score_projector'
|
| 22 |
+
2026-01-22 19:55:36 | INFO | Task 'alignment': type=mlp, ordinal=False
|
| 23 |
+
2026-01-22 19:55:36 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 24 |
+
2026-01-22 19:55:36 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 25 |
+
2026-01-22 19:55:36 | INFO | Loaded 6 parameters, 0 missing
|
| 26 |
+
2026-01-22 19:55:36 | INFO | ✓ Head initialized from 'score_projector'
|
| 27 |
+
2026-01-22 19:55:36 | INFO | Task 'preference': type=mlp, ordinal=False
|
| 28 |
+
2026-01-22 19:55:36 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
|
| 29 |
+
2026-01-22 19:55:36 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
|
| 30 |
+
2026-01-22 19:55:36 | INFO | Loaded 6 parameters, 0 missing
|
| 31 |
+
2026-01-22 19:55:36 | INFO | ✓ Head initialized from 'score_projector'
|
| 32 |
+
2026-01-22 19:55:36 | INFO | ✓ All heads initialized
|
| 33 |
+
2026-01-22 19:55:36 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 34 |
+
2026-01-22 19:55:36 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 35 |
+
2026-01-22 19:55:36 | INFO | Task 'musicality': train=4322, test=913
|
| 36 |
+
2026-01-22 19:55:36 | INFO | Task 'alignment': train=1923, test=913
|
| 37 |
+
2026-01-22 19:55:36 | INFO | Task 'preference': train=1065, test=275
|
| 38 |
+
2026-01-22 19:55:36 | INFO | Backbone trainable parameters: 21,279,237 (lr=1e-05)
|
| 39 |
+
2026-01-22 19:55:36 | INFO | Head parameters: 1,778,691 (lr=0.0001)
|
| 40 |
+
2026-01-22 19:55:36 | INFO | Total trainable parameters: 23,057,928
|
| 41 |
+
2026-01-22 19:55:36 | INFO | [MIXED MODE] Starting training for 5000 steps
|
| 42 |
+
2026-01-22 19:55:36 | INFO | Backbone LR: 1e-05, Head LR: 0.0001
|
| 43 |
+
2026-01-22 19:58:02 | INFO | [Step 100] musicality/loss=4.0102 | musicality/mse=25.7604 | alignment/loss=3.0251 | alignment/mse=14.7271 | preference/loss=1.5069 | preference/accuracy=0.5759 | lr_backbone=5.05e-06 | lr_heads=5.05e-05
|
| 44 |
+
2026-01-22 20:00:09 | INFO | [Step 200] musicality/loss=1.0180 | musicality/mse=1.7198 | alignment/loss=1.0024 | alignment/mse=1.7404 | preference/loss=0.6600 | preference/accuracy=0.6647 | lr_backbone=1.00e-05 | lr_heads=1.00e-04
|
| 45 |
+
2026-01-22 20:02:13 | INFO | [Step 300] musicality/loss=0.6936 | musicality/mse=0.7847 | alignment/loss=0.6862 | alignment/mse=0.7737 | preference/loss=0.5112 | preference/accuracy=0.7488 | lr_backbone=9.99e-06 | lr_heads=9.99e-05
|
| 46 |
+
2026-01-22 20:04:17 | INFO | [Step 400] musicality/loss=0.6136 | musicality/mse=0.6091 | alignment/loss=0.5997 | alignment/mse=0.5944 | preference/loss=0.4582 | preference/accuracy=0.7828 | lr_backbone=9.96e-06 | lr_heads=9.96e-05
|
| 47 |
+
2026-01-22 20:06:15 | INFO | [Step 500] musicality/loss=0.5617 | musicality/mse=0.5180 | alignment/loss=0.5633 | alignment/mse=0.5330 | preference/loss=0.4022 | preference/accuracy=0.8131 | lr_backbone=9.90e-06 | lr_heads=9.90e-05
|
| 48 |
+
2026-01-22 20:06:15 | INFO | [Step 500] Running validation...
|
| 49 |
+
2026-01-22 20:06:21 | INFO | [Val] musicality: loss=0.6488 | mse=0.6439
|
| 50 |
+
2026-01-22 20:06:26 | INFO | [Val] alignment: loss=0.6735 | mse=0.7288
|
| 51 |
+
2026-01-22 20:06:42 | INFO | [Val] preference: loss=0.5791 | accuracy=0.7301
|
| 52 |
+
2026-01-22 20:06:42 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_best.pt (81.2MB, 58 params)
|
| 53 |
+
2026-01-22 20:06:42 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_best_full.pt (161.6MB, 3 heads)
|
| 54 |
+
2026-01-22 20:06:42 | INFO | Saved model config to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/config.yaml
|
| 55 |
+
2026-01-22 20:06:42 | INFO | New best model saved (val_loss=0.6338)
|
| 56 |
+
2026-01-22 20:08:48 | INFO | [Step 600] musicality/loss=0.5497 | musicality/mse=0.5046 | alignment/loss=0.5343 | alignment/mse=0.4822 | preference/loss=0.3690 | preference/accuracy=0.8356 | lr_backbone=9.83e-06 | lr_heads=9.83e-05
|
| 57 |
+
2026-01-22 20:10:58 | INFO | [Step 700] musicality/loss=0.5343 | musicality/mse=0.4759 | alignment/loss=0.5158 | alignment/mse=0.4526 | preference/loss=0.3361 | preference/accuracy=0.8562 | lr_backbone=9.73e-06 | lr_heads=9.73e-05
|
| 58 |
+
2026-01-22 20:13:04 | INFO | [Step 800] musicality/loss=0.5077 | musicality/mse=0.4405 | alignment/loss=0.4961 | alignment/mse=0.4343 | preference/loss=0.3054 | preference/accuracy=0.8659 | lr_backbone=9.62e-06 | lr_heads=9.62e-05
|
| 59 |
+
2026-01-22 20:15:10 | INFO | [Step 900] musicality/loss=0.4827 | musicality/mse=0.4026 | alignment/loss=0.4907 | alignment/mse=0.4216 | preference/loss=0.2724 | preference/accuracy=0.8909 | lr_backbone=9.48e-06 | lr_heads=9.48e-05
|
| 60 |
+
2026-01-22 20:17:16 | INFO | [Step 1000] musicality/loss=0.4706 | musicality/mse=0.3813 | alignment/loss=0.4595 | alignment/mse=0.3812 | preference/loss=0.2412 | preference/accuracy=0.9059 | lr_backbone=9.33e-06 | lr_heads=9.33e-05
|
| 61 |
+
2026-01-22 20:17:16 | INFO | [Step 1000] Running validation...
|
| 62 |
+
2026-01-22 20:17:20 | INFO | [Val] musicality: loss=0.6520 | mse=0.6573
|
| 63 |
+
2026-01-22 20:17:25 | INFO | [Val] alignment: loss=0.7110 | mse=0.8175
|
| 64 |
+
2026-01-22 20:17:33 | INFO | [Val] preference: loss=0.6850 | accuracy=0.7290
|
| 65 |
+
2026-01-22 20:17:33 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_1000.pt (81.2MB, 58 params)
|
| 66 |
+
2026-01-22 20:17:33 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_1000_full.pt (161.6MB, 3 heads)
|
| 67 |
+
2026-01-22 20:19:38 | INFO | [Step 1100] musicality/loss=0.4653 | musicality/mse=0.3839 | alignment/loss=0.4591 | alignment/mse=0.3804 | preference/loss=0.2110 | preference/accuracy=0.9219 | lr_backbone=9.16e-06 | lr_heads=9.16e-05
|
| 68 |
+
2026-01-22 20:21:40 | INFO | [Step 1200] musicality/loss=0.4585 | musicality/mse=0.3653 | alignment/loss=0.4425 | alignment/mse=0.3537 | preference/loss=0.1931 | preference/accuracy=0.9275 | lr_backbone=8.97e-06 | lr_heads=8.97e-05
|
| 69 |
+
2026-01-22 20:23:52 | INFO | [Step 1300] musicality/loss=0.4420 | musicality/mse=0.3432 | alignment/loss=0.4205 | alignment/mse=0.3258 | preference/loss=0.1749 | preference/accuracy=0.9450 | lr_backbone=8.76e-06 | lr_heads=8.76e-05
|
| 70 |
+
2026-01-22 20:25:58 | INFO | [Step 1400] musicality/loss=0.4351 | musicality/mse=0.3408 | alignment/loss=0.4205 | alignment/mse=0.3224 | preference/loss=0.1601 | preference/accuracy=0.9466 | lr_backbone=8.54e-06 | lr_heads=8.54e-05
|
| 71 |
+
2026-01-22 20:28:00 | INFO | [Step 1500] musicality/loss=0.4294 | musicality/mse=0.3276 | alignment/loss=0.3935 | alignment/mse=0.2947 | preference/loss=0.1411 | preference/accuracy=0.9563 | lr_backbone=8.30e-06 | lr_heads=8.30e-05
|
| 72 |
+
2026-01-22 20:28:00 | INFO | [Step 1500] Running validation...
|
| 73 |
+
2026-01-22 20:28:04 | INFO | [Val] musicality: loss=0.6670 | mse=0.6936
|
| 74 |
+
2026-01-22 20:28:08 | INFO | [Val] alignment: loss=0.7408 | mse=0.8923
|
| 75 |
+
2026-01-22 20:28:16 | INFO | [Val] preference: loss=0.8865 | accuracy=0.7151
|
| 76 |
+
2026-01-22 20:30:15 | INFO | [Step 1600] musicality/loss=0.4214 | musicality/mse=0.3162 | alignment/loss=0.3917 | alignment/mse=0.2898 | preference/loss=0.1362 | preference/accuracy=0.9572 | lr_backbone=8.04e-06 | lr_heads=8.04e-05
|
| 77 |
+
2026-01-22 20:32:17 | INFO | [Step 1700] musicality/loss=0.4154 | musicality/mse=0.3088 | alignment/loss=0.3825 | alignment/mse=0.2771 | preference/loss=0.1202 | preference/accuracy=0.9637 | lr_backbone=7.78e-06 | lr_heads=7.78e-05
|
| 78 |
+
2026-01-22 20:34:22 | INFO | [Step 1800] musicality/loss=0.4103 | musicality/mse=0.3085 | alignment/loss=0.3628 | alignment/mse=0.2559 | preference/loss=0.1093 | preference/accuracy=0.9694 | lr_backbone=7.50e-06 | lr_heads=7.50e-05
|
| 79 |
+
2026-01-22 20:36:26 | INFO | [Step 1900] musicality/loss=0.3988 | musicality/mse=0.2859 | alignment/loss=0.3553 | alignment/mse=0.2509 | preference/loss=0.0938 | preference/accuracy=0.9725 | lr_backbone=7.21e-06 | lr_heads=7.21e-05
|
| 80 |
+
2026-01-22 20:38:26 | INFO | [Step 2000] musicality/loss=0.3971 | musicality/mse=0.2937 | alignment/loss=0.3568 | alignment/mse=0.2439 | preference/loss=0.0850 | preference/accuracy=0.9800 | lr_backbone=6.91e-06 | lr_heads=6.91e-05
|
| 81 |
+
2026-01-22 20:38:26 | INFO | [Step 2000] Running validation...
|
| 82 |
+
2026-01-22 20:38:29 | INFO | [Val] musicality: loss=0.6845 | mse=0.7318
|
| 83 |
+
2026-01-22 20:38:33 | INFO | [Val] alignment: loss=0.7468 | mse=0.9195
|
| 84 |
+
2026-01-22 20:38:40 | INFO | [Val] preference: loss=1.0557 | accuracy=0.7127
|
| 85 |
+
2026-01-22 20:38:40 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_2000.pt (81.2MB, 58 params)
|
| 86 |
+
2026-01-22 20:38:40 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_2000_full.pt (161.6MB, 3 heads)
|
| 87 |
+
2026-01-22 20:40:41 | INFO | [Step 2100] musicality/loss=0.3805 | musicality/mse=0.2657 | alignment/loss=0.3390 | alignment/mse=0.2305 | preference/loss=0.0812 | preference/accuracy=0.9769 | lr_backbone=6.61e-06 | lr_heads=6.61e-05
|
| 88 |
+
2026-01-22 20:42:41 | INFO | [Step 2200] musicality/loss=0.3902 | musicality/mse=0.2838 | alignment/loss=0.3319 | alignment/mse=0.2229 | preference/loss=0.0766 | preference/accuracy=0.9784 | lr_backbone=6.29e-06 | lr_heads=6.29e-05
|
| 89 |
+
2026-01-22 20:44:45 | INFO | [Step 2300] musicality/loss=0.3838 | musicality/mse=0.2732 | alignment/loss=0.3318 | alignment/mse=0.2150 | preference/loss=0.0723 | preference/accuracy=0.9784 | lr_backbone=5.98e-06 | lr_heads=5.98e-05
|
| 90 |
+
2026-01-22 20:46:49 | INFO | [Step 2400] musicality/loss=0.3717 | musicality/mse=0.2606 | alignment/loss=0.3228 | alignment/mse=0.2107 | preference/loss=0.0689 | preference/accuracy=0.9834 | lr_backbone=5.65e-06 | lr_heads=5.65e-05
|
| 91 |
+
2026-01-22 20:48:54 | INFO | [Step 2500] musicality/loss=0.3597 | musicality/mse=0.2386 | alignment/loss=0.3152 | alignment/mse=0.2051 | preference/loss=0.0572 | preference/accuracy=0.9853 | lr_backbone=5.33e-06 | lr_heads=5.33e-05
|
| 92 |
+
2026-01-22 20:48:54 | INFO | [Step 2500] Running validation...
|
| 93 |
+
2026-01-22 20:48:58 | INFO | [Val] musicality: loss=0.6836 | mse=0.7373
|
| 94 |
+
2026-01-22 20:49:03 | INFO | [Val] alignment: loss=0.7766 | mse=0.9850
|
| 95 |
+
2026-01-22 20:49:10 | INFO | [Val] preference: loss=1.2142 | accuracy=0.6906
|
| 96 |
+
2026-01-22 20:51:14 | INFO | [Step 2600] musicality/loss=0.3659 | musicality/mse=0.2496 | alignment/loss=0.3106 | alignment/mse=0.1954 | preference/loss=0.0531 | preference/accuracy=0.9891 | lr_backbone=5.00e-06 | lr_heads=5.00e-05
|
| 97 |
+
2026-01-22 20:53:13 | INFO | [Step 2700] musicality/loss=0.3661 | musicality/mse=0.2551 | alignment/loss=0.3030 | alignment/mse=0.1852 | preference/loss=0.0515 | preference/accuracy=0.9875 | lr_backbone=4.67e-06 | lr_heads=4.67e-05
|
| 98 |
+
2026-01-22 20:55:14 | INFO | [Step 2800] musicality/loss=0.3553 | musicality/mse=0.2406 | alignment/loss=0.3005 | alignment/mse=0.1872 | preference/loss=0.0515 | preference/accuracy=0.9888 | lr_backbone=4.35e-06 | lr_heads=4.35e-05
|
| 99 |
+
2026-01-22 20:57:16 | INFO | [Step 2900] musicality/loss=0.3592 | musicality/mse=0.2419 | alignment/loss=0.2965 | alignment/mse=0.1796 | preference/loss=0.0445 | preference/accuracy=0.9888 | lr_backbone=4.02e-06 | lr_heads=4.02e-05
|
| 100 |
+
2026-01-22 20:59:16 | INFO | [Step 3000] musicality/loss=0.3505 | musicality/mse=0.2338 | alignment/loss=0.2840 | alignment/mse=0.1693 | preference/loss=0.0439 | preference/accuracy=0.9916 | lr_backbone=3.71e-06 | lr_heads=3.71e-05
|
| 101 |
+
2026-01-22 20:59:16 | INFO | [Step 3000] Running validation...
|
| 102 |
+
2026-01-22 20:59:20 | INFO | [Val] musicality: loss=0.7002 | mse=0.7711
|
| 103 |
+
2026-01-22 20:59:25 | INFO | [Val] alignment: loss=0.7825 | mse=1.0091
|
| 104 |
+
2026-01-22 20:59:32 | INFO | [Val] preference: loss=1.3055 | accuracy=0.6965
|
| 105 |
+
2026-01-22 20:59:32 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_3000.pt (81.2MB, 58 params)
|
| 106 |
+
2026-01-22 20:59:32 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_3000_full.pt (161.6MB, 3 heads)
|
| 107 |
+
2026-01-22 21:01:30 | INFO | [Step 3100] musicality/loss=0.3430 | musicality/mse=0.2244 | alignment/loss=0.2829 | alignment/mse=0.1674 | preference/loss=0.0455 | preference/accuracy=0.9903 | lr_backbone=3.39e-06 | lr_heads=3.39e-05
|
| 108 |
+
2026-01-22 21:03:28 | INFO | [Step 3200] musicality/loss=0.3406 | musicality/mse=0.2224 | alignment/loss=0.2833 | alignment/mse=0.1693 | preference/loss=0.0406 | preference/accuracy=0.9903 | lr_backbone=3.09e-06 | lr_heads=3.09e-05
|
| 109 |
+
2026-01-22 21:05:26 | INFO | [Step 3300] musicality/loss=0.3375 | musicality/mse=0.2178 | alignment/loss=0.2742 | alignment/mse=0.1619 | preference/loss=0.0361 | preference/accuracy=0.9925 | lr_backbone=2.79e-06 | lr_heads=2.79e-05
|
| 110 |
+
2026-01-22 21:07:33 | INFO | [Step 3400] musicality/loss=0.3322 | musicality/mse=0.2134 | alignment/loss=0.2738 | alignment/mse=0.1624 | preference/loss=0.0381 | preference/accuracy=0.9931 | lr_backbone=2.50e-06 | lr_heads=2.50e-05
|
| 111 |
+
2026-01-22 21:09:29 | INFO | [Step 3500] musicality/loss=0.3395 | musicality/mse=0.2221 | alignment/loss=0.2694 | alignment/mse=0.1529 | preference/loss=0.0383 | preference/accuracy=0.9919 | lr_backbone=2.22e-06 | lr_heads=2.22e-05
|
| 112 |
+
2026-01-22 21:09:29 | INFO | [Step 3500] Running validation...
|
| 113 |
+
2026-01-22 21:09:34 | INFO | [Val] musicality: loss=0.7099 | mse=0.7968
|
| 114 |
+
2026-01-22 21:09:39 | INFO | [Val] alignment: loss=0.7999 | mse=1.0596
|
| 115 |
+
2026-01-22 21:09:47 | INFO | [Val] preference: loss=1.4323 | accuracy=0.6860
|
| 116 |
+
2026-01-22 21:11:45 | INFO | [Step 3600] musicality/loss=0.3416 | musicality/mse=0.2289 | alignment/loss=0.2664 | alignment/mse=0.1553 | preference/loss=0.0351 | preference/accuracy=0.9944 | lr_backbone=1.96e-06 | lr_heads=1.96e-05
|
| 117 |
+
2026-01-22 21:13:47 | INFO | [Step 3700] musicality/loss=0.3342 | musicality/mse=0.2132 | alignment/loss=0.2616 | alignment/mse=0.1459 | preference/loss=0.0350 | preference/accuracy=0.9931 | lr_backbone=1.70e-06 | lr_heads=1.70e-05
|
| 118 |
+
2026-01-22 21:15:53 | INFO | [Step 3800] musicality/loss=0.3355 | musicality/mse=0.2140 | alignment/loss=0.2655 | alignment/mse=0.1539 | preference/loss=0.0359 | preference/accuracy=0.9931 | lr_backbone=1.46e-06 | lr_heads=1.46e-05
|
| 119 |
+
2026-01-22 21:17:52 | INFO | [Step 3900] musicality/loss=0.3208 | musicality/mse=0.2009 | alignment/loss=0.2614 | alignment/mse=0.1469 | preference/loss=0.0318 | preference/accuracy=0.9950 | lr_backbone=1.24e-06 | lr_heads=1.24e-05
|
| 120 |
+
2026-01-22 21:19:50 | INFO | [Step 4000] musicality/loss=0.3265 | musicality/mse=0.2104 | alignment/loss=0.2603 | alignment/mse=0.1458 | preference/loss=0.0311 | preference/accuracy=0.9950 | lr_backbone=1.03e-06 | lr_heads=1.03e-05
|
| 121 |
+
2026-01-22 21:19:50 | INFO | [Step 4000] Running validation...
|
| 122 |
+
2026-01-22 21:19:55 | INFO | [Val] musicality: loss=0.7095 | mse=0.7949
|
| 123 |
+
2026-01-22 21:19:58 | INFO | [Val] alignment: loss=0.8059 | mse=1.0730
|
| 124 |
+
2026-01-22 21:20:06 | INFO | [Val] preference: loss=1.4329 | accuracy=0.6802
|
| 125 |
+
2026-01-22 21:20:06 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_4000.pt (81.2MB, 58 params)
|
| 126 |
+
2026-01-22 21:20:06 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_4000_full.pt (161.6MB, 3 heads)
|
| 127 |
+
2026-01-22 21:22:06 | INFO | [Step 4100] musicality/loss=0.3249 | musicality/mse=0.2018 | alignment/loss=0.2543 | alignment/mse=0.1402 | preference/loss=0.0294 | preference/accuracy=0.9956 | lr_backbone=8.43e-07 | lr_heads=8.43e-06
|
| 128 |
+
2026-01-22 21:24:01 | INFO | [Step 4200] musicality/loss=0.3225 | musicality/mse=0.2066 | alignment/loss=0.2586 | alignment/mse=0.1471 | preference/loss=0.0328 | preference/accuracy=0.9931 | lr_backbone=6.70e-07 | lr_heads=6.70e-06
|
| 129 |
+
2026-01-22 21:25:56 | INFO | [Step 4300] musicality/loss=0.3226 | musicality/mse=0.2079 | alignment/loss=0.2555 | alignment/mse=0.1438 | preference/loss=0.0289 | preference/accuracy=0.9947 | lr_backbone=5.16e-07 | lr_heads=5.16e-06
|
| 130 |
+
2026-01-22 21:27:56 | INFO | [Step 4400] musicality/loss=0.3169 | musicality/mse=0.1972 | alignment/loss=0.2543 | alignment/mse=0.1377 | preference/loss=0.0299 | preference/accuracy=0.9941 | lr_backbone=3.81e-07 | lr_heads=3.81e-06
|
| 131 |
+
2026-01-22 21:29:52 | INFO | [Step 4500] musicality/loss=0.3281 | musicality/mse=0.2140 | alignment/loss=0.2503 | alignment/mse=0.1388 | preference/loss=0.0312 | preference/accuracy=0.9953 | lr_backbone=2.65e-07 | lr_heads=2.65e-06
|
| 132 |
+
2026-01-22 21:29:52 | INFO | [Step 4500] Running validation...
|
| 133 |
+
2026-01-22 21:29:57 | INFO | [Val] musicality: loss=0.7054 | mse=0.7867
|
| 134 |
+
2026-01-22 21:30:02 | INFO | [Val] alignment: loss=0.8063 | mse=1.0763
|
| 135 |
+
2026-01-22 21:30:11 | INFO | [Val] preference: loss=1.4512 | accuracy=0.6767
|
| 136 |
+
2026-01-22 21:32:14 | INFO | [Step 4600] musicality/loss=0.3216 | musicality/mse=0.2008 | alignment/loss=0.2560 | alignment/mse=0.1431 | preference/loss=0.0332 | preference/accuracy=0.9944 | lr_backbone=1.70e-07 | lr_heads=1.70e-06
|
| 137 |
+
2026-01-22 21:34:14 | INFO | [Step 4700] musicality/loss=0.3259 | musicality/mse=0.2067 | alignment/loss=0.2512 | alignment/mse=0.1408 | preference/loss=0.0284 | preference/accuracy=0.9956 | lr_backbone=9.61e-08 | lr_heads=9.61e-07
|
| 138 |
+
2026-01-22 21:36:15 | INFO | [Step 4800] musicality/loss=0.3268 | musicality/mse=0.2086 | alignment/loss=0.2501 | alignment/mse=0.1375 | preference/loss=0.0310 | preference/accuracy=0.9928 | lr_backbone=4.28e-08 | lr_heads=4.28e-07
|
| 139 |
+
2026-01-22 21:38:19 | INFO | [Step 4900] musicality/loss=0.3168 | musicality/mse=0.1950 | alignment/loss=0.2517 | alignment/mse=0.1389 | preference/loss=0.0309 | preference/accuracy=0.9938 | lr_backbone=1.07e-08 | lr_heads=1.07e-07
|
| 140 |
+
2026-01-22 21:40:24 | INFO | [Step 5000] musicality/loss=0.3217 | musicality/mse=0.2037 | alignment/loss=0.2489 | alignment/mse=0.1369 | preference/loss=0.0322 | preference/accuracy=0.9938 | lr_backbone=0.00e+00 | lr_heads=0.00e+00
|
| 141 |
+
2026-01-22 21:40:24 | INFO | [Step 5000] Running validation...
|
| 142 |
+
2026-01-22 21:40:28 | INFO | [Val] musicality: loss=0.7057 | mse=0.7873
|
| 143 |
+
2026-01-22 21:40:32 | INFO | [Val] alignment: loss=0.8051 | mse=1.0733
|
| 144 |
+
2026-01-22 21:40:39 | INFO | [Val] preference: loss=1.4580 | accuracy=0.6767
|
| 145 |
+
2026-01-22 21:40:39 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_5000.pt (81.2MB, 58 params)
|
| 146 |
+
2026-01-22 21:40:39 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_5000_full.pt (161.6MB, 3 heads)
|
| 147 |
+
2026-01-22 21:40:39 | INFO | Training complete. Running final validation...
|
| 148 |
+
2026-01-22 21:40:44 | INFO | [Final Val] musicality: loss=0.7057 | mse=0.7873
|
| 149 |
+
2026-01-22 21:40:48 | INFO | [Final Val] alignment: loss=0.8051 | mse=1.0733
|
| 150 |
+
2026-01-22 21:40:56 | INFO | [Final Val] preference: loss=1.4580 | accuracy=0.6767
|
| 151 |
+
2026-01-22 21:40:56 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_final.pt (81.2MB, 58 params)
|
| 152 |
+
2026-01-22 21:40:57 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_final_full.pt (161.6MB, 3 heads)
|
| 153 |
+
2026-01-22 21:40:57 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_final.pt
|
downstream_mixed_linear/20260122_1143/config.yaml
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '7'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1000
|
| 14 |
+
min_duration: 250
|
| 15 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 16 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 17 |
+
duration: 600.0
|
| 18 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 19 |
+
max_samples: null
|
| 20 |
+
max_val_samples: null
|
| 21 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/unbiased_qwen/train.json
|
| 22 |
+
sample_rate: 24000
|
| 23 |
+
use_preextracted: true
|
| 24 |
+
val_preference_file: null
|
| 25 |
+
loss:
|
| 26 |
+
IF_ratio: 0.5
|
| 27 |
+
filter_ties: true
|
| 28 |
+
label_smoothing: 0.0
|
| 29 |
+
reduction: mean
|
| 30 |
+
model:
|
| 31 |
+
attention_mode: CA
|
| 32 |
+
attn_dropout: 0.0
|
| 33 |
+
category_embeddings: null
|
| 34 |
+
dim: 768
|
| 35 |
+
dim_head: 64
|
| 36 |
+
downsample:
|
| 37 |
+
configs:
|
| 38 |
+
conv2_4x:
|
| 39 |
+
factor: 4
|
| 40 |
+
kernel_size: 5
|
| 41 |
+
kind: conv*2
|
| 42 |
+
use_layernorm: true
|
| 43 |
+
conv_4x:
|
| 44 |
+
factor: 4
|
| 45 |
+
kernel_size: 5
|
| 46 |
+
kind: conv
|
| 47 |
+
stage: 1
|
| 48 |
+
use_layernorm: true
|
| 49 |
+
glu_4x:
|
| 50 |
+
factor: 4
|
| 51 |
+
kernel_size: 5
|
| 52 |
+
kind: gluconv*2+pw
|
| 53 |
+
use_layernorm: true
|
| 54 |
+
mean:
|
| 55 |
+
factor: 2
|
| 56 |
+
kind: mean
|
| 57 |
+
mean_4x:
|
| 58 |
+
dropout: 0.0
|
| 59 |
+
factor: 30
|
| 60 |
+
kind: mean+mlp
|
| 61 |
+
mlp_ratio: 2.0
|
| 62 |
+
none:
|
| 63 |
+
factor: 1
|
| 64 |
+
kind: none
|
| 65 |
+
eval: mean_4x
|
| 66 |
+
ref: null
|
| 67 |
+
text: none
|
| 68 |
+
ff_dropout: 0.0
|
| 69 |
+
ff_mult: 4
|
| 70 |
+
freeze_audio: true
|
| 71 |
+
freeze_text: true
|
| 72 |
+
gradient_checkpointing: false
|
| 73 |
+
heads: 8
|
| 74 |
+
joint_tf_depth: 1
|
| 75 |
+
load_config:
|
| 76 |
+
checkpoint_path: null
|
| 77 |
+
frozen_from_pretrained: true
|
| 78 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 79 |
+
strict: false
|
| 80 |
+
mlp_dim: 768
|
| 81 |
+
mode: text_only
|
| 82 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 83 |
+
name: reward
|
| 84 |
+
null_embedding:
|
| 85 |
+
audio:
|
| 86 |
+
dropout: 0.5
|
| 87 |
+
length: 10
|
| 88 |
+
lyrics:
|
| 89 |
+
dropout: 0.3
|
| 90 |
+
length: 10
|
| 91 |
+
text:
|
| 92 |
+
dropout: 0.2
|
| 93 |
+
length: 10
|
| 94 |
+
output_dim: 2
|
| 95 |
+
prompt_tf_depth: 1
|
| 96 |
+
sr: 24000
|
| 97 |
+
text_encoder:
|
| 98 |
+
name: muq_mulan
|
| 99 |
+
tune: null
|
| 100 |
+
text_lora_config: null
|
| 101 |
+
train_muq_depth: 0
|
| 102 |
+
train_muqmulan: false
|
| 103 |
+
use_layer_idx: -1
|
| 104 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 105 |
+
run_name: null
|
| 106 |
+
train:
|
| 107 |
+
batch_size: 24
|
| 108 |
+
betas:
|
| 109 |
+
- 0.9
|
| 110 |
+
- 0.99
|
| 111 |
+
ema_decay: 0.9999
|
| 112 |
+
ema_update_every: 1
|
| 113 |
+
enable_gradient_checkpointing: true
|
| 114 |
+
force_clear_prev_results: false
|
| 115 |
+
grad_accum_every: 2
|
| 116 |
+
log_tensorboard: true
|
| 117 |
+
lr_schedule:
|
| 118 |
+
min_lr_ratio: 0.001
|
| 119 |
+
name: linear_cosine
|
| 120 |
+
total_steps: 30000
|
| 121 |
+
warmup_steps: 300
|
| 122 |
+
max_grad_norm: 100
|
| 123 |
+
mlp_lr: 0.0002
|
| 124 |
+
num_train_steps: 30000
|
| 125 |
+
num_valid_batches: 10
|
| 126 |
+
num_workers: 8
|
| 127 |
+
other_lr: null
|
| 128 |
+
resume: null
|
| 129 |
+
resume_optimizer: false
|
| 130 |
+
save_model_every: 2000
|
| 131 |
+
use_checkpoint_config: false
|
| 132 |
+
use_ema: true
|
| 133 |
+
use_lion: false
|
| 134 |
+
valid_batch_size: 20
|
| 135 |
+
valid_every: 2000
|
| 136 |
+
valid_frac: 0.1
|
| 137 |
+
verify_weights_on_load: true
|
| 138 |
+
validate:
|
| 139 |
+
checkpoint: null
|
| 140 |
+
dir: null
|
| 141 |
+
max_failure_cases: 30
|
| 142 |
+
num_batches: null
|
| 143 |
+
only: false
|
| 144 |
+
trust_checkpoint: true
|
downstream_mixed_linear/20260122_1143/downstream_config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 2 |
+
basics:
|
| 3 |
+
random_seed: 42
|
| 4 |
+
save_dir: ${project_root}/experiments/downstream_mixed_linear
|
| 5 |
+
run_name: null
|
| 6 |
+
tasks:
|
| 7 |
+
- musicality
|
| 8 |
+
- alignment
|
| 9 |
+
- preference
|
| 10 |
+
backbone:
|
| 11 |
+
checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 12 |
+
freeze: false
|
| 13 |
+
freeze_encoder_only: true
|
| 14 |
+
from_ema: false
|
| 15 |
+
dataset:
|
| 16 |
+
train_file: ${project_root}/train_multitask.jsonl
|
| 17 |
+
test_file: ${project_root}/test_multitask.jsonl
|
| 18 |
+
heads:
|
| 19 |
+
hidden_dim: 768
|
| 20 |
+
init_from: null
|
| 21 |
+
musicality:
|
| 22 |
+
use_mlp: false
|
| 23 |
+
ordinal: false
|
| 24 |
+
dropout: 0.0
|
| 25 |
+
num_categories: 9
|
| 26 |
+
y_min: 1.0
|
| 27 |
+
y_max: 5.0
|
| 28 |
+
step: 0.5
|
| 29 |
+
alignment:
|
| 30 |
+
use_mlp: false
|
| 31 |
+
ordinal: false
|
| 32 |
+
dropout: 0.0
|
| 33 |
+
num_categories: 9
|
| 34 |
+
y_min: 1.0
|
| 35 |
+
y_max: 5.0
|
| 36 |
+
step: 0.5
|
| 37 |
+
preference:
|
| 38 |
+
use_mlp: false
|
| 39 |
+
dropout: 0.0
|
| 40 |
+
train:
|
| 41 |
+
dataset_mode: mixed
|
| 42 |
+
num_train_steps: 5000
|
| 43 |
+
batch_size: 32
|
| 44 |
+
learning_rate: 0.0005
|
| 45 |
+
backbone_learning_rate: 1.0e-05
|
| 46 |
+
weight_decay: 0.01
|
| 47 |
+
max_grad_norm: 1.0
|
| 48 |
+
warmup_steps: 200
|
| 49 |
+
schedule_type: cosine
|
| 50 |
+
min_lr_ratio: 0.01
|
| 51 |
+
log_interval: 100
|
| 52 |
+
val_interval: 500
|
| 53 |
+
save_interval: 1000
|
| 54 |
+
num_workers: 8
|
| 55 |
+
resume: null
|
| 56 |
+
device: cuda
|
downstream_mixed_linear/20260122_1143/predicted_0122_1533.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
downstream_mixed_linear/20260122_1143/train.log
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-22 11:43:55 | INFO | Starting downstream training: 20260122_1143
|
| 2 |
+
2026-01-22 11:43:55 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143
|
| 3 |
+
2026-01-22 11:43:55 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/config.yaml
|
| 4 |
+
2026-01-22 11:43:55 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
|
| 5 |
+
2026-01-22 11:43:55 | INFO | Dataset mode: mixed
|
| 6 |
+
2026-01-22 11:43:58 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 7 |
+
2026-01-22 11:43:58 | INFO | Using checkpoint config for model
|
| 8 |
+
2026-01-22 11:44:03 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
|
| 9 |
+
2026-01-22 11:44:03 | WARNING | Missing keys: 283
|
| 10 |
+
2026-01-22 11:44:04 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
|
| 11 |
+
2026-01-22 11:44:04 | INFO | Created MixedDownstreamTaskModel (freeze_encoder_only=True)
|
| 12 |
+
2026-01-22 11:44:04 | INFO | Added linear head for task 'musicality'
|
| 13 |
+
2026-01-22 11:44:04 | INFO | Added linear head for task 'alignment'
|
| 14 |
+
2026-01-22 11:44:04 | INFO | Added linear head for task 'preference'
|
| 15 |
+
2026-01-22 11:44:04 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
|
| 16 |
+
2026-01-22 11:44:04 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
|
| 17 |
+
2026-01-22 11:44:04 | INFO | Task 'musicality': train=4322, test=913
|
| 18 |
+
2026-01-22 11:44:04 | INFO | Task 'alignment': train=1923, test=913
|
| 19 |
+
2026-01-22 11:44:04 | INFO | Task 'preference': train=1065, test=275
|
| 20 |
+
2026-01-22 11:44:04 | INFO | Backbone trainable parameters: 21,279,237 (lr=1e-05)
|
| 21 |
+
2026-01-22 11:44:04 | INFO | Head parameters: 2,307 (lr=0.0005)
|
| 22 |
+
2026-01-22 11:44:04 | INFO | Total trainable parameters: 21,281,544
|
| 23 |
+
2026-01-22 11:44:04 | INFO | [MIXED MODE] Starting training for 5000 steps
|
| 24 |
+
2026-01-22 11:44:04 | INFO | Backbone LR: 1e-05, Head LR: 0.0005
|
| 25 |
+
2026-01-22 11:46:24 | INFO | [Step 100] musicality/loss=2.2005 | musicality/mse=8.3402 | alignment/loss=2.2115 | alignment/mse=9.0544 | preference/loss=0.9264 | preference/accuracy=0.5772 | lr_backbone=5.05e-06 | lr_heads=2.53e-04
|
| 26 |
+
2026-01-22 11:48:37 | INFO | [Step 200] musicality/loss=0.7967 | musicality/mse=1.0614 | alignment/loss=0.7918 | alignment/mse=1.0462 | preference/loss=0.5578 | preference/accuracy=0.7228 | lr_backbone=1.00e-05 | lr_heads=5.00e-04
|
| 27 |
+
2026-01-22 11:50:55 | INFO | [Step 300] musicality/loss=0.6328 | musicality/mse=0.6628 | alignment/loss=0.6676 | alignment/mse=0.7203 | preference/loss=0.4710 | preference/accuracy=0.7700 | lr_backbone=9.99e-06 | lr_heads=4.99e-04
|
| 28 |
+
2026-01-22 11:53:17 | INFO | [Step 400] musicality/loss=0.5768 | musicality/mse=0.5607 | alignment/loss=0.6102 | alignment/mse=0.6049 | preference/loss=0.4418 | preference/accuracy=0.7941 | lr_backbone=9.96e-06 | lr_heads=4.98e-04
|
| 29 |
+
2026-01-22 11:55:31 | INFO | [Step 500] musicality/loss=0.5430 | musicality/mse=0.4994 | alignment/loss=0.5927 | alignment/mse=0.5801 | preference/loss=0.4014 | preference/accuracy=0.8159 | lr_backbone=9.90e-06 | lr_heads=4.95e-04
|
| 30 |
+
2026-01-22 11:55:31 | INFO | [Step 500] Running validation...
|
| 31 |
+
2026-01-22 11:55:40 | INFO | [Val] musicality: loss=1.1473 | mse=1.8126
|
| 32 |
+
2026-01-22 11:55:47 | INFO | [Val] alignment: loss=1.0390 | mse=1.5827
|
| 33 |
+
2026-01-22 11:56:00 | INFO | [Val] preference: loss=0.5431 | accuracy=0.7405
|
| 34 |
+
2026-01-22 11:56:00 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_best.pt (81.2MB, 58 params)
|
| 35 |
+
2026-01-22 11:56:00 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_best_full.pt (141.3MB, 3 heads)
|
| 36 |
+
2026-01-22 11:56:00 | INFO | New best model saved (val_loss=0.9098)
|
| 37 |
+
2026-01-22 11:58:16 | INFO | [Step 600] musicality/loss=0.5325 | musicality/mse=0.4837 | alignment/loss=0.5695 | alignment/mse=0.5422 | preference/loss=0.3828 | preference/accuracy=0.8187 | lr_backbone=9.83e-06 | lr_heads=4.91e-04
|
| 38 |
+
2026-01-22 12:00:34 | INFO | [Step 700] musicality/loss=0.5178 | musicality/mse=0.4543 | alignment/loss=0.5538 | alignment/mse=0.5039 | preference/loss=0.3556 | preference/accuracy=0.8400 | lr_backbone=9.73e-06 | lr_heads=4.87e-04
|
| 39 |
+
2026-01-22 12:02:52 | INFO | [Step 800] musicality/loss=0.4982 | musicality/mse=0.4278 | alignment/loss=0.5263 | alignment/mse=0.4646 | preference/loss=0.3258 | preference/accuracy=0.8547 | lr_backbone=9.62e-06 | lr_heads=4.81e-04
|
| 40 |
+
2026-01-22 12:05:06 | INFO | [Step 900] musicality/loss=0.4885 | musicality/mse=0.4085 | alignment/loss=0.5110 | alignment/mse=0.4444 | preference/loss=0.2978 | preference/accuracy=0.8709 | lr_backbone=9.48e-06 | lr_heads=4.74e-04
|
| 41 |
+
2026-01-22 12:07:14 | INFO | [Step 1000] musicality/loss=0.4678 | musicality/mse=0.3857 | alignment/loss=0.4986 | alignment/mse=0.4226 | preference/loss=0.2730 | preference/accuracy=0.8916 | lr_backbone=9.33e-06 | lr_heads=4.67e-04
|
| 42 |
+
2026-01-22 12:07:14 | INFO | [Step 1000] Running validation...
|
| 43 |
+
2026-01-22 12:07:19 | INFO | [Val] musicality: loss=1.3048 | mse=2.2855
|
| 44 |
+
2026-01-22 12:07:24 | INFO | [Val] alignment: loss=1.2686 | mse=2.1902
|
| 45 |
+
2026-01-22 12:07:34 | INFO | [Val] preference: loss=0.6575 | accuracy=0.7058
|
| 46 |
+
2026-01-22 12:07:34 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_1000.pt (81.2MB, 58 params)
|
| 47 |
+
2026-01-22 12:07:34 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_1000_full.pt (141.3MB, 3 heads)
|
| 48 |
+
2026-01-22 12:09:42 | INFO | [Step 1100] musicality/loss=0.4760 | musicality/mse=0.3916 | alignment/loss=0.4768 | alignment/mse=0.3976 | preference/loss=0.2531 | preference/accuracy=0.8972 | lr_backbone=9.16e-06 | lr_heads=4.58e-04
|
| 49 |
+
2026-01-22 12:11:45 | INFO | [Step 1200] musicality/loss=0.4702 | musicality/mse=0.3893 | alignment/loss=0.4787 | alignment/mse=0.3995 | preference/loss=0.2229 | preference/accuracy=0.9147 | lr_backbone=8.97e-06 | lr_heads=4.48e-04
|
| 50 |
+
2026-01-22 12:13:48 | INFO | [Step 1300] musicality/loss=0.4608 | musicality/mse=0.3740 | alignment/loss=0.4580 | alignment/mse=0.3734 | preference/loss=0.2015 | preference/accuracy=0.9309 | lr_backbone=8.76e-06 | lr_heads=4.38e-04
|
| 51 |
+
2026-01-22 12:15:48 | INFO | [Step 1400] musicality/loss=0.4470 | musicality/mse=0.3551 | alignment/loss=0.4462 | alignment/mse=0.3573 | preference/loss=0.1836 | preference/accuracy=0.9369 | lr_backbone=8.54e-06 | lr_heads=4.27e-04
|
| 52 |
+
2026-01-22 12:17:55 | INFO | [Step 1500] musicality/loss=0.4396 | musicality/mse=0.3439 | alignment/loss=0.4267 | alignment/mse=0.3357 | preference/loss=0.1711 | preference/accuracy=0.9428 | lr_backbone=8.30e-06 | lr_heads=4.15e-04
|
| 53 |
+
2026-01-22 12:17:55 | INFO | [Step 1500] Running validation...
|
| 54 |
+
2026-01-22 12:18:00 | INFO | [Val] musicality: loss=1.3053 | mse=2.2794
|
| 55 |
+
2026-01-22 12:18:05 | INFO | [Val] alignment: loss=1.1733 | mse=1.9250
|
| 56 |
+
2026-01-22 12:18:12 | INFO | [Val] preference: loss=0.9029 | accuracy=0.6954
|
| 57 |
+
2026-01-22 12:20:16 | INFO | [Step 1600] musicality/loss=0.4350 | musicality/mse=0.3406 | alignment/loss=0.4201 | alignment/mse=0.3266 | preference/loss=0.1518 | preference/accuracy=0.9556 | lr_backbone=8.04e-06 | lr_heads=4.02e-04
|
| 58 |
+
2026-01-22 12:22:29 | INFO | [Step 1700] musicality/loss=0.4266 | musicality/mse=0.3288 | alignment/loss=0.4157 | alignment/mse=0.3292 | preference/loss=0.1400 | preference/accuracy=0.9616 | lr_backbone=7.78e-06 | lr_heads=3.89e-04
|
| 59 |
+
2026-01-22 12:24:36 | INFO | [Step 1800] musicality/loss=0.4175 | musicality/mse=0.3159 | alignment/loss=0.4053 | alignment/mse=0.3053 | preference/loss=0.1269 | preference/accuracy=0.9672 | lr_backbone=7.50e-06 | lr_heads=3.75e-04
|
| 60 |
+
2026-01-22 12:26:44 | INFO | [Step 1900] musicality/loss=0.4130 | musicality/mse=0.3172 | alignment/loss=0.3933 | alignment/mse=0.2983 | preference/loss=0.1208 | preference/accuracy=0.9647 | lr_backbone=7.21e-06 | lr_heads=3.61e-04
|
| 61 |
+
2026-01-22 12:28:50 | INFO | [Step 2000] musicality/loss=0.3964 | musicality/mse=0.2923 | alignment/loss=0.3785 | alignment/mse=0.2798 | preference/loss=0.1063 | preference/accuracy=0.9744 | lr_backbone=6.91e-06 | lr_heads=3.46e-04
|
| 62 |
+
2026-01-22 12:28:50 | INFO | [Step 2000] Running validation...
|
| 63 |
+
2026-01-22 12:28:54 | INFO | [Val] musicality: loss=1.2472 | mse=2.1547
|
| 64 |
+
2026-01-22 12:28:59 | INFO | [Val] alignment: loss=1.3002 | mse=2.3432
|
| 65 |
+
2026-01-22 12:29:06 | INFO | [Val] preference: loss=1.0439 | accuracy=0.6999
|
| 66 |
+
2026-01-22 12:29:06 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_2000.pt (81.2MB, 58 params)
|
| 67 |
+
2026-01-22 12:29:07 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_2000_full.pt (141.3MB, 3 heads)
|
| 68 |
+
2026-01-22 12:31:17 | INFO | [Step 2100] musicality/loss=0.3994 | musicality/mse=0.2960 | alignment/loss=0.3650 | alignment/mse=0.2675 | preference/loss=0.1017 | preference/accuracy=0.9750 | lr_backbone=6.61e-06 | lr_heads=3.30e-04
|
| 69 |
+
2026-01-22 12:33:21 | INFO | [Step 2200] musicality/loss=0.3913 | musicality/mse=0.2835 | alignment/loss=0.3655 | alignment/mse=0.2679 | preference/loss=0.0847 | preference/accuracy=0.9816 | lr_backbone=6.29e-06 | lr_heads=3.15e-04
|
| 70 |
+
2026-01-22 12:35:26 | INFO | [Step 2300] musicality/loss=0.3898 | musicality/mse=0.2861 | alignment/loss=0.3502 | alignment/mse=0.2519 | preference/loss=0.0748 | preference/accuracy=0.9838 | lr_backbone=5.98e-06 | lr_heads=2.99e-04
|
| 71 |
+
2026-01-22 12:37:23 | INFO | [Step 2400] musicality/loss=0.3874 | musicality/mse=0.2812 | alignment/loss=0.3407 | alignment/mse=0.2414 | preference/loss=0.0749 | preference/accuracy=0.9822 | lr_backbone=5.65e-06 | lr_heads=2.83e-04
|
| 72 |
+
2026-01-22 12:39:29 | INFO | [Step 2500] musicality/loss=0.3718 | musicality/mse=0.2615 | alignment/loss=0.3326 | alignment/mse=0.2398 | preference/loss=0.0760 | preference/accuracy=0.9831 | lr_backbone=5.33e-06 | lr_heads=2.66e-04
|
| 73 |
+
2026-01-22 12:39:29 | INFO | [Step 2500] Running validation...
|
| 74 |
+
2026-01-22 12:39:34 | INFO | [Val] musicality: loss=1.3347 | mse=2.3899
|
| 75 |
+
2026-01-22 12:39:39 | INFO | [Val] alignment: loss=1.2975 | mse=2.3241
|
| 76 |
+
2026-01-22 12:39:47 | INFO | [Val] preference: loss=1.1918 | accuracy=0.7023
|
| 77 |
+
2026-01-22 12:41:57 | INFO | [Step 2600] musicality/loss=0.3819 | musicality/mse=0.2791 | alignment/loss=0.3210 | alignment/mse=0.2263 | preference/loss=0.0686 | preference/accuracy=0.9831 | lr_backbone=5.00e-06 | lr_heads=2.50e-04
|
| 78 |
+
2026-01-22 12:44:04 | INFO | [Step 2700] musicality/loss=0.3627 | musicality/mse=0.2539 | alignment/loss=0.3177 | alignment/mse=0.2255 | preference/loss=0.0612 | preference/accuracy=0.9894 | lr_backbone=4.67e-06 | lr_heads=2.34e-04
|
| 79 |
+
2026-01-22 12:46:14 | INFO | [Step 2800] musicality/loss=0.3585 | musicality/mse=0.2494 | alignment/loss=0.3108 | alignment/mse=0.2167 | preference/loss=0.0606 | preference/accuracy=0.9888 | lr_backbone=4.35e-06 | lr_heads=2.17e-04
|
| 80 |
+
2026-01-22 12:48:15 | INFO | [Step 2900] musicality/loss=0.3659 | musicality/mse=0.2576 | alignment/loss=0.3024 | alignment/mse=0.2116 | preference/loss=0.0593 | preference/accuracy=0.9869 | lr_backbone=4.02e-06 | lr_heads=2.01e-04
|
| 81 |
+
2026-01-22 12:50:20 | INFO | [Step 3000] musicality/loss=0.3553 | musicality/mse=0.2498 | alignment/loss=0.2947 | alignment/mse=0.2077 | preference/loss=0.0585 | preference/accuracy=0.9878 | lr_backbone=3.71e-06 | lr_heads=1.85e-04
|
| 82 |
+
2026-01-22 12:50:20 | INFO | [Step 3000] Running validation...
|
| 83 |
+
2026-01-22 12:50:25 | INFO | [Val] musicality: loss=1.3277 | mse=2.3656
|
| 84 |
+
2026-01-22 12:50:30 | INFO | [Val] alignment: loss=1.2973 | mse=2.3727
|
| 85 |
+
2026-01-22 12:50:38 | INFO | [Val] preference: loss=1.3603 | accuracy=0.6919
|
| 86 |
+
2026-01-22 12:50:38 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_3000.pt (81.2MB, 58 params)
|
| 87 |
+
2026-01-22 12:50:38 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_3000_full.pt (141.3MB, 3 heads)
|
| 88 |
+
2026-01-22 12:52:38 | INFO | [Step 3100] musicality/loss=0.3486 | musicality/mse=0.2427 | alignment/loss=0.2857 | alignment/mse=0.1987 | preference/loss=0.0523 | preference/accuracy=0.9900 | lr_backbone=3.39e-06 | lr_heads=1.70e-04
|
| 89 |
+
2026-01-22 12:54:36 | INFO | [Step 3200] musicality/loss=0.3537 | musicality/mse=0.2494 | alignment/loss=0.2805 | alignment/mse=0.1955 | preference/loss=0.0583 | preference/accuracy=0.9869 | lr_backbone=3.09e-06 | lr_heads=1.54e-04
|
| 90 |
+
2026-01-22 12:56:42 | INFO | [Step 3300] musicality/loss=0.3390 | musicality/mse=0.2315 | alignment/loss=0.2796 | alignment/mse=0.1946 | preference/loss=0.0515 | preference/accuracy=0.9922 | lr_backbone=2.79e-06 | lr_heads=1.39e-04
|
| 91 |
+
2026-01-22 12:58:57 | INFO | [Step 3400] musicality/loss=0.3474 | musicality/mse=0.2402 | alignment/loss=0.2661 | alignment/mse=0.1839 | preference/loss=0.0536 | preference/accuracy=0.9859 | lr_backbone=2.50e-06 | lr_heads=1.25e-04
|
| 92 |
+
2026-01-22 13:01:01 | INFO | [Step 3500] musicality/loss=0.3431 | musicality/mse=0.2380 | alignment/loss=0.2742 | alignment/mse=0.1948 | preference/loss=0.0469 | preference/accuracy=0.9916 | lr_backbone=2.22e-06 | lr_heads=1.11e-04
|
| 93 |
+
2026-01-22 13:01:01 | INFO | [Step 3500] Running validation...
|
| 94 |
+
2026-01-22 13:01:05 | INFO | [Val] musicality: loss=1.3314 | mse=2.3666
|
| 95 |
+
2026-01-22 13:01:09 | INFO | [Val] alignment: loss=1.3121 | mse=2.4192
|
| 96 |
+
2026-01-22 13:01:18 | INFO | [Val] preference: loss=1.3744 | accuracy=0.6954
|
| 97 |
+
2026-01-22 13:03:23 | INFO | [Step 3600] musicality/loss=0.3315 | musicality/mse=0.2288 | alignment/loss=0.2630 | alignment/mse=0.1819 | preference/loss=0.0501 | preference/accuracy=0.9897 | lr_backbone=1.96e-06 | lr_heads=9.78e-05
|
| 98 |
+
2026-01-22 13:05:35 | INFO | [Step 3700] musicality/loss=0.3355 | musicality/mse=0.2339 | alignment/loss=0.2564 | alignment/mse=0.1801 | preference/loss=0.0435 | preference/accuracy=0.9931 | lr_backbone=1.70e-06 | lr_heads=8.52e-05
|
| 99 |
+
2026-01-22 13:07:35 | INFO | [Step 3800] musicality/loss=0.3233 | musicality/mse=0.2131 | alignment/loss=0.2572 | alignment/mse=0.1828 | preference/loss=0.0474 | preference/accuracy=0.9916 | lr_backbone=1.46e-06 | lr_heads=7.32e-05
|
| 100 |
+
2026-01-22 13:09:36 | INFO | [Step 3900] musicality/loss=0.3264 | musicality/mse=0.2250 | alignment/loss=0.2501 | alignment/mse=0.1753 | preference/loss=0.0467 | preference/accuracy=0.9891 | lr_backbone=1.24e-06 | lr_heads=6.20e-05
|
| 101 |
+
2026-01-22 13:11:37 | INFO | [Step 4000] musicality/loss=0.3311 | musicality/mse=0.2319 | alignment/loss=0.2497 | alignment/mse=0.1790 | preference/loss=0.0453 | preference/accuracy=0.9909 | lr_backbone=1.03e-06 | lr_heads=5.17e-05
|
| 102 |
+
2026-01-22 13:11:37 | INFO | [Step 4000] Running validation...
|
| 103 |
+
2026-01-22 13:11:41 | INFO | [Val] musicality: loss=1.3147 | mse=2.3406
|
| 104 |
+
2026-01-22 13:11:45 | INFO | [Val] alignment: loss=1.3227 | mse=2.4580
|
| 105 |
+
2026-01-22 13:11:53 | INFO | [Val] preference: loss=1.4434 | accuracy=0.6954
|
| 106 |
+
2026-01-22 13:11:53 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_4000.pt (81.2MB, 58 params)
|
| 107 |
+
2026-01-22 13:11:53 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_4000_full.pt (141.3MB, 3 heads)
|
| 108 |
+
2026-01-22 13:13:52 | INFO | [Step 4100] musicality/loss=0.3189 | musicality/mse=0.2125 | alignment/loss=0.2453 | alignment/mse=0.1733 | preference/loss=0.0447 | preference/accuracy=0.9922 | lr_backbone=8.43e-07 | lr_heads=4.21e-05
|
| 109 |
+
2026-01-22 13:15:55 | INFO | [Step 4200] musicality/loss=0.3213 | musicality/mse=0.2174 | alignment/loss=0.2428 | alignment/mse=0.1738 | preference/loss=0.0405 | preference/accuracy=0.9934 | lr_backbone=6.70e-07 | lr_heads=3.35e-05
|
| 110 |
+
2026-01-22 13:17:53 | INFO | [Step 4300] musicality/loss=0.3200 | musicality/mse=0.2224 | alignment/loss=0.2357 | alignment/mse=0.1659 | preference/loss=0.0388 | preference/accuracy=0.9941 | lr_backbone=5.16e-07 | lr_heads=2.58e-05
|
| 111 |
+
2026-01-22 13:19:56 | INFO | [Step 4400] musicality/loss=0.3121 | musicality/mse=0.2100 | alignment/loss=0.2416 | alignment/mse=0.1771 | preference/loss=0.0408 | preference/accuracy=0.9950 | lr_backbone=3.81e-07 | lr_heads=1.90e-05
|
finetune_human/20260124_2143/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '3'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: false
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: false
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 1500
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 4000
|
| 125 |
+
warmup_steps: 300
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 0.0001
|
| 128 |
+
num_train_steps: 4000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: true
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
finetune_human/20260124_2143/reward_model/1769262210.5061178/events.out.tfevents.1769262210.MACLAB-S004.2626926.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82ee766b07252644d7045f50ffd3d29ed1cbc0b26a834bdb1d855c526f959108
|
| 3 |
+
size 503
|
finetune_human/20260124_2143/reward_model/1769262210.5078583/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 0.0001
|
| 4 |
+
num_train_steps: 4000
|
finetune_human/20260124_2143/reward_model/events.out.tfevents.1769262210.MACLAB-S004.2626926.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:361130a96e5393eb1f50a4f818c47547a16295e3f01976ce0e9113e0a561cf68
|
| 3 |
+
size 2219689
|
finetune_human/20260124_2143/train.20260124_2143.log
ADDED
|
@@ -0,0 +1,803 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-24 21:43:19 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/train.20260124_2143.log
|
| 2 |
+
2026-01-24 21:43:19 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-24 21:43:21 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-24 21:43:21 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-24 21:43:21 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 6 |
+
2026-01-24 21:43:21 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-24 21:43:29 | INFO | Created RewardAttentionModel with attention_mode=SA
|
| 8 |
+
2026-01-24 21:43:29 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-24 21:43:29 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-24 21:43:29 | INFO | ✓ EMA enabled with decay=0.9999, update_every=1 (CPU offload)
|
| 11 |
+
2026-01-24 21:43:29 | INFO | MLP head parameters: 1,186,563 params, lr=0.0001
|
| 12 |
+
2026-01-24 21:43:29 | INFO | Other parameters: 37,397,634 params, lr=1e-05
|
| 13 |
+
2026-01-24 21:43:29 | INFO | Using lr_schedule=linear_cosine warmup_steps=300 total_steps=4000
|
| 14 |
+
2026-01-24 21:43:29 | INFO | Training with fixed validation set
|
| 15 |
+
2026-01-24 21:43:29 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 16 |
+
2026-01-24 21:43:29 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 17 |
+
2026-01-24 21:43:29 | INFO | ✓ EMA state loaded
|
| 18 |
+
2026-01-24 21:43:29 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=29999)
|
| 19 |
+
2026-01-24 21:43:29 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 20 |
+
2026-01-24 21:43:29 | INFO | Parameters: 701.162M total, 38.584M trainable
|
| 21 |
+
2026-01-24 21:43:29 | INFO | Text encoder (frozen): 328.389M
|
| 22 |
+
2026-01-24 21:43:29 | INFO | Audio encoder (frozen): 334.189M
|
| 23 |
+
2026-01-24 21:43:29 | INFO | Other trainable: 38.584M
|
| 24 |
+
2026-01-24 21:43:29 | INFO | ℹ No LoRA configuration detected
|
| 25 |
+
2026-01-24 21:43:30 | INFO | ============================================================
|
| 26 |
+
2026-01-24 21:43:30 | INFO | Ready to start training
|
| 27 |
+
2026-01-24 21:43:30 | INFO | ============================================================
|
| 28 |
+
2026-01-24 21:43:30 | INFO | Starting training from step 0
|
| 29 |
+
2026-01-24 21:43:30 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 30 |
+
2026-01-24 21:43:30 | INFO | accelerator.device = cuda
|
| 31 |
+
2026-01-24 21:43:30 | INFO | mixed_precision = bf16
|
| 32 |
+
2026-01-24 21:43:30 | INFO | distributed_type = NO
|
| 33 |
+
2026-01-24 21:43:30 | INFO | num_processes = 1
|
| 34 |
+
2026-01-24 21:43:30 | INFO | process_index = 0
|
| 35 |
+
2026-01-24 21:43:30 | INFO | is_main_process = True
|
| 36 |
+
2026-01-24 21:43:30 | INFO | torch.cuda.is_available() = True
|
| 37 |
+
2026-01-24 21:43:30 | INFO | torch.cuda.device_count() = 1
|
| 38 |
+
2026-01-24 21:43:30 | INFO | current_device = 0
|
| 39 |
+
2026-01-24 21:43:30 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 40 |
+
2026-01-24 21:43:30 | INFO | model parameter device = cuda:0
|
| 41 |
+
2026-01-24 21:43:30 | INFO | Training for 4000.0 steps (~63 epochs, 64 steps/epoch)
|
| 42 |
+
2026-01-24 21:43:38 | INFO | Step 0: loss=1.6133 | IF_loss=2.2461, MQ_loss=0.9805 | acc=0.740 (IF=0.708, MQ=0.771) | lr=0.000001
|
| 43 |
+
2026-01-24 21:43:38 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
|
| 44 |
+
2026-01-24 21:43:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt (575.2MB)
|
| 45 |
+
2026-01-24 21:43:39 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt
|
| 46 |
+
2026-01-24 21:45:32 | INFO |
|
| 47 |
+
============================================================
|
| 48 |
+
Validation Results (took 9.56s):
|
| 49 |
+
Samples: 346 instruction, 346 quality
|
| 50 |
+
Instruction Acc: 0.6821
|
| 51 |
+
Quality Acc: 0.6387
|
| 52 |
+
Average Acc: 0.6604
|
| 53 |
+
Total Loss: 1.8726
|
| 54 |
+
Instruction Loss: 1.6586
|
| 55 |
+
Quality Loss: 2.0866
|
| 56 |
+
============================================================
|
| 57 |
+
2026-01-24 21:45:32 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
|
| 58 |
+
2026-01-24 21:45:33 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt (575.2MB)
|
| 59 |
+
2026-01-24 21:45:33 | INFO | Best 1 checkpoints:
|
| 60 |
+
2026-01-24 21:45:33 | INFO | 1. Step 99: acc=0.6604 (reward_model.best_99.pt)
|
| 61 |
+
2026-01-24 21:45:34 | INFO | Step 100: loss=1.5309 | IF_loss=1.2373, MQ_loss=1.8246 | acc=0.646 (IF=0.688, MQ=0.604) | lr=0.000034
|
| 62 |
+
2026-01-24 21:47:29 | INFO |
|
| 63 |
+
============================================================
|
| 64 |
+
Validation Results (took 8.11s):
|
| 65 |
+
Samples: 346 instruction, 346 quality
|
| 66 |
+
Instruction Acc: 0.6850
|
| 67 |
+
Quality Acc: 0.6387
|
| 68 |
+
Average Acc: 0.6618
|
| 69 |
+
Total Loss: 1.8631
|
| 70 |
+
Instruction Loss: 1.6525
|
| 71 |
+
Quality Loss: 2.0736
|
| 72 |
+
============================================================
|
| 73 |
+
2026-01-24 21:47:29 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
|
| 74 |
+
2026-01-24 21:47:30 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt (575.2MB)
|
| 75 |
+
2026-01-24 21:47:30 | INFO | Best 2 checkpoints:
|
| 76 |
+
2026-01-24 21:47:30 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 77 |
+
2026-01-24 21:47:30 | INFO | 2. Step 99: acc=0.6604 (reward_model.best_99.pt)
|
| 78 |
+
2026-01-24 21:47:31 | INFO | Step 200: loss=0.4360 | IF_loss=0.4299, MQ_loss=0.4421 | acc=0.833 (IF=0.812, MQ=0.854) | lr=0.000067
|
| 79 |
+
2026-01-24 21:49:25 | INFO |
|
| 80 |
+
============================================================
|
| 81 |
+
Validation Results (took 9.42s):
|
| 82 |
+
Samples: 346 instruction, 346 quality
|
| 83 |
+
Instruction Acc: 0.6850
|
| 84 |
+
Quality Acc: 0.6387
|
| 85 |
+
Average Acc: 0.6618
|
| 86 |
+
Total Loss: 1.8438
|
| 87 |
+
Instruction Loss: 1.6364
|
| 88 |
+
Quality Loss: 2.0512
|
| 89 |
+
============================================================
|
| 90 |
+
2026-01-24 21:49:25 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
|
| 91 |
+
2026-01-24 21:49:25 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt (575.2MB)
|
| 92 |
+
2026-01-24 21:49:25 | INFO | Best 3 checkpoints:
|
| 93 |
+
2026-01-24 21:49:25 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 94 |
+
2026-01-24 21:49:25 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 95 |
+
2026-01-24 21:49:25 | INFO | 3. Step 99: acc=0.6604 (reward_model.best_99.pt)
|
| 96 |
+
2026-01-24 21:49:26 | INFO | Step 300: loss=0.4121 | IF_loss=0.5007, MQ_loss=0.3235 | acc=0.844 (IF=0.792, MQ=0.896) | lr=0.000100
|
| 97 |
+
2026-01-24 21:51:23 | INFO |
|
| 98 |
+
============================================================
|
| 99 |
+
Validation Results (took 7.32s):
|
| 100 |
+
Samples: 346 instruction, 346 quality
|
| 101 |
+
Instruction Acc: 0.6850
|
| 102 |
+
Quality Acc: 0.6387
|
| 103 |
+
Average Acc: 0.6618
|
| 104 |
+
Total Loss: 1.8266
|
| 105 |
+
Instruction Loss: 1.6230
|
| 106 |
+
Quality Loss: 2.0303
|
| 107 |
+
============================================================
|
| 108 |
+
2026-01-24 21:51:23 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
|
| 109 |
+
2026-01-24 21:51:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt (575.2MB)
|
| 110 |
+
2026-01-24 21:51:24 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt
|
| 111 |
+
2026-01-24 21:51:24 | INFO | Best 3 checkpoints:
|
| 112 |
+
2026-01-24 21:51:24 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 113 |
+
2026-01-24 21:51:24 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 114 |
+
2026-01-24 21:51:24 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
|
| 115 |
+
2026-01-24 21:51:25 | INFO | Step 400: loss=0.4819 | IF_loss=0.4988, MQ_loss=0.4650 | acc=0.760 (IF=0.708, MQ=0.812) | lr=0.000100
|
| 116 |
+
2026-01-24 21:53:18 | INFO |
|
| 117 |
+
============================================================
|
| 118 |
+
Validation Results (took 8.30s):
|
| 119 |
+
Samples: 346 instruction, 346 quality
|
| 120 |
+
Instruction Acc: 0.6821
|
| 121 |
+
Quality Acc: 0.6416
|
| 122 |
+
Average Acc: 0.6618
|
| 123 |
+
Total Loss: 1.8103
|
| 124 |
+
Instruction Loss: 1.6100
|
| 125 |
+
Quality Loss: 2.0107
|
| 126 |
+
============================================================
|
| 127 |
+
2026-01-24 21:53:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
|
| 128 |
+
2026-01-24 21:53:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt (575.2MB)
|
| 129 |
+
2026-01-24 21:53:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt
|
| 130 |
+
2026-01-24 21:53:19 | INFO | Best 3 checkpoints:
|
| 131 |
+
2026-01-24 21:53:19 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 132 |
+
2026-01-24 21:53:19 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 133 |
+
2026-01-24 21:53:19 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
|
| 134 |
+
2026-01-24 21:53:20 | INFO | Step 500: loss=0.4074 | IF_loss=0.4939, MQ_loss=0.3209 | acc=0.854 (IF=0.792, MQ=0.917) | lr=0.000099
|
| 135 |
+
2026-01-24 21:55:17 | INFO |
|
| 136 |
+
============================================================
|
| 137 |
+
Validation Results (took 7.55s):
|
| 138 |
+
Samples: 346 instruction, 346 quality
|
| 139 |
+
Instruction Acc: 0.6821
|
| 140 |
+
Quality Acc: 0.6416
|
| 141 |
+
Average Acc: 0.6618
|
| 142 |
+
Total Loss: 1.7951
|
| 143 |
+
Instruction Loss: 1.5986
|
| 144 |
+
Quality Loss: 1.9916
|
| 145 |
+
============================================================
|
| 146 |
+
2026-01-24 21:55:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
|
| 147 |
+
2026-01-24 21:55:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt (575.2MB)
|
| 148 |
+
2026-01-24 21:55:17 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt
|
| 149 |
+
2026-01-24 21:55:17 | INFO | Best 3 checkpoints:
|
| 150 |
+
2026-01-24 21:55:17 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 151 |
+
2026-01-24 21:55:17 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 152 |
+
2026-01-24 21:55:17 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
|
| 153 |
+
2026-01-24 21:55:18 | INFO | Step 600: loss=0.3505 | IF_loss=0.3784, MQ_loss=0.3226 | acc=0.844 (IF=0.812, MQ=0.875) | lr=0.000098
|
| 154 |
+
2026-01-24 21:57:14 | INFO |
|
| 155 |
+
============================================================
|
| 156 |
+
Validation Results (took 7.89s):
|
| 157 |
+
Samples: 346 instruction, 346 quality
|
| 158 |
+
Instruction Acc: 0.6821
|
| 159 |
+
Quality Acc: 0.6445
|
| 160 |
+
Average Acc: 0.6633
|
| 161 |
+
Total Loss: 1.7807
|
| 162 |
+
Instruction Loss: 1.5876
|
| 163 |
+
Quality Loss: 1.9739
|
| 164 |
+
============================================================
|
| 165 |
+
2026-01-24 21:57:14 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
|
| 166 |
+
2026-01-24 21:57:14 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt (575.2MB)
|
| 167 |
+
2026-01-24 21:57:14 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt
|
| 168 |
+
2026-01-24 21:57:14 | INFO | Best 3 checkpoints:
|
| 169 |
+
2026-01-24 21:57:14 | INFO | 1. Step 699: acc=0.6633 (reward_model.best_699.pt)
|
| 170 |
+
2026-01-24 21:57:14 | INFO | 2. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 171 |
+
2026-01-24 21:57:14 | INFO | 3. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 172 |
+
2026-01-24 21:57:15 | INFO | Step 700: loss=0.2439 | IF_loss=0.3054, MQ_loss=0.1823 | acc=0.875 (IF=0.854, MQ=0.896) | lr=0.000097
|
| 173 |
+
2026-01-24 21:59:13 | INFO |
|
| 174 |
+
============================================================
|
| 175 |
+
Validation Results (took 7.71s):
|
| 176 |
+
Samples: 346 instruction, 346 quality
|
| 177 |
+
Instruction Acc: 0.6821
|
| 178 |
+
Quality Acc: 0.6474
|
| 179 |
+
Average Acc: 0.6647
|
| 180 |
+
Total Loss: 1.7686
|
| 181 |
+
Instruction Loss: 1.5780
|
| 182 |
+
Quality Loss: 1.9591
|
| 183 |
+
============================================================
|
| 184 |
+
2026-01-24 21:59:13 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
|
| 185 |
+
2026-01-24 21:59:13 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt (575.2MB)
|
| 186 |
+
2026-01-24 21:59:13 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt
|
| 187 |
+
2026-01-24 21:59:13 | INFO | Best 3 checkpoints:
|
| 188 |
+
2026-01-24 21:59:13 | INFO | 1. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 189 |
+
2026-01-24 21:59:13 | INFO | 2. Step 699: acc=0.6633 (reward_model.best_699.pt)
|
| 190 |
+
2026-01-24 21:59:13 | INFO | 3. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 191 |
+
2026-01-24 21:59:14 | INFO | Step 800: loss=0.2827 | IF_loss=0.3525, MQ_loss=0.2128 | acc=0.885 (IF=0.875, MQ=0.896) | lr=0.000096
|
| 192 |
+
2026-01-24 22:01:11 | INFO |
|
| 193 |
+
============================================================
|
| 194 |
+
Validation Results (took 7.05s):
|
| 195 |
+
Samples: 346 instruction, 346 quality
|
| 196 |
+
Instruction Acc: 0.6850
|
| 197 |
+
Quality Acc: 0.6474
|
| 198 |
+
Average Acc: 0.6662
|
| 199 |
+
Total Loss: 1.7570
|
| 200 |
+
Instruction Loss: 1.5693
|
| 201 |
+
Quality Loss: 1.9446
|
| 202 |
+
============================================================
|
| 203 |
+
2026-01-24 22:01:11 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
|
| 204 |
+
2026-01-24 22:01:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt (575.2MB)
|
| 205 |
+
2026-01-24 22:01:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt
|
| 206 |
+
2026-01-24 22:01:12 | INFO | Best 3 checkpoints:
|
| 207 |
+
2026-01-24 22:01:12 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 208 |
+
2026-01-24 22:01:12 | INFO | 2. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 209 |
+
2026-01-24 22:01:12 | INFO | 3. Step 699: acc=0.6633 (reward_model.best_699.pt)
|
| 210 |
+
2026-01-24 22:01:13 | INFO | Step 900: loss=0.1525 | IF_loss=0.1838, MQ_loss=0.1212 | acc=0.958 (IF=0.958, MQ=0.958) | lr=0.000094
|
| 211 |
+
2026-01-24 22:03:07 | INFO |
|
| 212 |
+
============================================================
|
| 213 |
+
Validation Results (took 7.74s):
|
| 214 |
+
Samples: 346 instruction, 346 quality
|
| 215 |
+
Instruction Acc: 0.6821
|
| 216 |
+
Quality Acc: 0.6474
|
| 217 |
+
Average Acc: 0.6647
|
| 218 |
+
Total Loss: 1.7472
|
| 219 |
+
Instruction Loss: 1.5625
|
| 220 |
+
Quality Loss: 1.9319
|
| 221 |
+
============================================================
|
| 222 |
+
2026-01-24 22:03:07 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt (filtered to 38.584M trainable parameters)
|
| 223 |
+
2026-01-24 22:03:08 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt (575.2MB)
|
| 224 |
+
2026-01-24 22:03:08 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt
|
| 225 |
+
2026-01-24 22:03:08 | INFO | Best 3 checkpoints:
|
| 226 |
+
2026-01-24 22:03:08 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 227 |
+
2026-01-24 22:03:08 | INFO | 2. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 228 |
+
2026-01-24 22:03:08 | INFO | 3. Step 999: acc=0.6647 (reward_model.best_999.pt)
|
| 229 |
+
2026-01-24 22:03:09 | INFO | Step 1000: loss=0.1671 | IF_loss=0.1673, MQ_loss=0.1668 | acc=0.969 (IF=0.979, MQ=0.958) | lr=0.000091
|
| 230 |
+
2026-01-24 22:05:04 | INFO |
|
| 231 |
+
============================================================
|
| 232 |
+
Validation Results (took 6.94s):
|
| 233 |
+
Samples: 346 instruction, 346 quality
|
| 234 |
+
Instruction Acc: 0.6850
|
| 235 |
+
Quality Acc: 0.6474
|
| 236 |
+
Average Acc: 0.6662
|
| 237 |
+
Total Loss: 1.7380
|
| 238 |
+
Instruction Loss: 1.5555
|
| 239 |
+
Quality Loss: 1.9205
|
| 240 |
+
============================================================
|
| 241 |
+
2026-01-24 22:05:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt (filtered to 38.584M trainable parameters)
|
| 242 |
+
2026-01-24 22:05:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt (575.2MB)
|
| 243 |
+
2026-01-24 22:05:04 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt
|
| 244 |
+
2026-01-24 22:05:04 | INFO | Best 3 checkpoints:
|
| 245 |
+
2026-01-24 22:05:04 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 246 |
+
2026-01-24 22:05:04 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 247 |
+
2026-01-24 22:05:04 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 248 |
+
2026-01-24 22:05:05 | INFO | Step 1100: loss=0.1267 | IF_loss=0.1381, MQ_loss=0.1154 | acc=0.948 (IF=0.958, MQ=0.938) | lr=0.000089
|
| 249 |
+
2026-01-24 22:07:02 | INFO |
|
| 250 |
+
============================================================
|
| 251 |
+
Validation Results (took 7.34s):
|
| 252 |
+
Samples: 346 instruction, 346 quality
|
| 253 |
+
Instruction Acc: 0.6850
|
| 254 |
+
Quality Acc: 0.6416
|
| 255 |
+
Average Acc: 0.6633
|
| 256 |
+
Total Loss: 1.7320
|
| 257 |
+
Instruction Loss: 1.5520
|
| 258 |
+
Quality Loss: 1.9119
|
| 259 |
+
============================================================
|
| 260 |
+
2026-01-24 22:07:02 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt (filtered to 38.584M trainable parameters)
|
| 261 |
+
2026-01-24 22:07:03 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt (575.2MB)
|
| 262 |
+
2026-01-24 22:07:03 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt
|
| 263 |
+
2026-01-24 22:07:03 | INFO | Best 3 checkpoints:
|
| 264 |
+
2026-01-24 22:07:03 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 265 |
+
2026-01-24 22:07:03 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 266 |
+
2026-01-24 22:07:03 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 267 |
+
2026-01-24 22:07:04 | INFO | Step 1200: loss=0.1201 | IF_loss=0.1744, MQ_loss=0.0657 | acc=0.948 (IF=0.917, MQ=0.979) | lr=0.000086
|
| 268 |
+
2026-01-24 22:08:59 | INFO |
|
| 269 |
+
============================================================
|
| 270 |
+
Validation Results (took 7.61s):
|
| 271 |
+
Samples: 346 instruction, 346 quality
|
| 272 |
+
Instruction Acc: 0.6850
|
| 273 |
+
Quality Acc: 0.6416
|
| 274 |
+
Average Acc: 0.6633
|
| 275 |
+
Total Loss: 1.7259
|
| 276 |
+
Instruction Loss: 1.5481
|
| 277 |
+
Quality Loss: 1.9036
|
| 278 |
+
============================================================
|
| 279 |
+
2026-01-24 22:08:59 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt (filtered to 38.584M trainable parameters)
|
| 280 |
+
2026-01-24 22:09:00 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt (575.2MB)
|
| 281 |
+
2026-01-24 22:09:00 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt
|
| 282 |
+
2026-01-24 22:09:00 | INFO | Best 3 checkpoints:
|
| 283 |
+
2026-01-24 22:09:00 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 284 |
+
2026-01-24 22:09:00 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 285 |
+
2026-01-24 22:09:00 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 286 |
+
2026-01-24 22:09:01 | INFO | Step 1300: loss=0.0937 | IF_loss=0.1357, MQ_loss=0.0516 | acc=0.958 (IF=0.938, MQ=0.979) | lr=0.000083
|
| 287 |
+
2026-01-24 22:10:53 | INFO |
|
| 288 |
+
============================================================
|
| 289 |
+
Validation Results (took 7.24s):
|
| 290 |
+
Samples: 346 instruction, 346 quality
|
| 291 |
+
Instruction Acc: 0.6850
|
| 292 |
+
Quality Acc: 0.6416
|
| 293 |
+
Average Acc: 0.6633
|
| 294 |
+
Total Loss: 1.7217
|
| 295 |
+
Instruction Loss: 1.5459
|
| 296 |
+
Quality Loss: 1.8975
|
| 297 |
+
============================================================
|
| 298 |
+
2026-01-24 22:10:53 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt (filtered to 38.584M trainable parameters)
|
| 299 |
+
2026-01-24 22:10:54 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt (575.2MB)
|
| 300 |
+
2026-01-24 22:10:54 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt
|
| 301 |
+
2026-01-24 22:10:54 | INFO | Best 3 checkpoints:
|
| 302 |
+
2026-01-24 22:10:54 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 303 |
+
2026-01-24 22:10:54 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 304 |
+
2026-01-24 22:10:54 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 305 |
+
2026-01-24 22:10:55 | INFO | Step 1400: loss=0.0782 | IF_loss=0.1080, MQ_loss=0.0484 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000080
|
| 306 |
+
2026-01-24 22:12:49 | INFO |
|
| 307 |
+
============================================================
|
| 308 |
+
Validation Results (took 7.28s):
|
| 309 |
+
Samples: 346 instruction, 346 quality
|
| 310 |
+
Instruction Acc: 0.6908
|
| 311 |
+
Quality Acc: 0.6416
|
| 312 |
+
Average Acc: 0.6662
|
| 313 |
+
Total Loss: 1.7182
|
| 314 |
+
Instruction Loss: 1.5441
|
| 315 |
+
Quality Loss: 1.8922
|
| 316 |
+
============================================================
|
| 317 |
+
2026-01-24 22:12:49 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt (filtered to 38.584M trainable parameters)
|
| 318 |
+
2026-01-24 22:12:49 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt (575.2MB)
|
| 319 |
+
2026-01-24 22:12:49 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt
|
| 320 |
+
2026-01-24 22:12:49 | INFO | Best 3 checkpoints:
|
| 321 |
+
2026-01-24 22:12:49 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 322 |
+
2026-01-24 22:12:49 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 323 |
+
2026-01-24 22:12:49 | INFO | 3. Step 1499: acc=0.6662 (reward_model.best_1499.pt)
|
| 324 |
+
2026-01-24 22:12:50 | INFO | Step 1500: loss=0.0699 | IF_loss=0.0703, MQ_loss=0.0695 | acc=0.979 (IF=0.979, MQ=0.979) | lr=0.000076
|
| 325 |
+
2026-01-24 22:14:41 | INFO |
|
| 326 |
+
============================================================
|
| 327 |
+
Validation Results (took 7.76s):
|
| 328 |
+
Samples: 346 instruction, 346 quality
|
| 329 |
+
Instruction Acc: 0.6908
|
| 330 |
+
Quality Acc: 0.6416
|
| 331 |
+
Average Acc: 0.6662
|
| 332 |
+
Total Loss: 1.7151
|
| 333 |
+
Instruction Loss: 1.5435
|
| 334 |
+
Quality Loss: 1.8867
|
| 335 |
+
============================================================
|
| 336 |
+
2026-01-24 22:14:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt (filtered to 38.584M trainable parameters)
|
| 337 |
+
2026-01-24 22:14:42 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt (575.2MB)
|
| 338 |
+
2026-01-24 22:14:42 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt
|
| 339 |
+
2026-01-24 22:14:42 | INFO | Best 3 checkpoints:
|
| 340 |
+
2026-01-24 22:14:42 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 341 |
+
2026-01-24 22:14:42 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 342 |
+
2026-01-24 22:14:42 | INFO | 3. Step 1499: acc=0.6662 (reward_model.best_1499.pt)
|
| 343 |
+
2026-01-24 22:14:46 | INFO | Step 1600: loss=0.0346 | IF_loss=0.0421, MQ_loss=0.0272 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000072
|
| 344 |
+
2026-01-24 22:16:34 | INFO |
|
| 345 |
+
============================================================
|
| 346 |
+
Validation Results (took 7.04s):
|
| 347 |
+
Samples: 346 instruction, 346 quality
|
| 348 |
+
Instruction Acc: 0.6908
|
| 349 |
+
Quality Acc: 0.6445
|
| 350 |
+
Average Acc: 0.6676
|
| 351 |
+
Total Loss: 1.7117
|
| 352 |
+
Instruction Loss: 1.5434
|
| 353 |
+
Quality Loss: 1.8800
|
| 354 |
+
============================================================
|
| 355 |
+
2026-01-24 22:16:34 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt (filtered to 38.584M trainable parameters)
|
| 356 |
+
2026-01-24 22:16:35 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt (575.2MB)
|
| 357 |
+
2026-01-24 22:16:35 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt
|
| 358 |
+
2026-01-24 22:16:35 | INFO | Best 3 checkpoints:
|
| 359 |
+
2026-01-24 22:16:35 | INFO | 1. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
|
| 360 |
+
2026-01-24 22:16:35 | INFO | 2. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 361 |
+
2026-01-24 22:16:35 | INFO | 3. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 362 |
+
2026-01-24 22:16:36 | INFO | Step 1700: loss=0.0480 | IF_loss=0.0609, MQ_loss=0.0350 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000069
|
| 363 |
+
2026-01-24 22:18:30 | INFO |
|
| 364 |
+
============================================================
|
| 365 |
+
Validation Results (took 7.06s):
|
| 366 |
+
Samples: 346 instruction, 346 quality
|
| 367 |
+
Instruction Acc: 0.6936
|
| 368 |
+
Quality Acc: 0.6445
|
| 369 |
+
Average Acc: 0.6691
|
| 370 |
+
Total Loss: 1.7110
|
| 371 |
+
Instruction Loss: 1.5436
|
| 372 |
+
Quality Loss: 1.8783
|
| 373 |
+
============================================================
|
| 374 |
+
2026-01-24 22:18:30 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt (filtered to 38.584M trainable parameters)
|
| 375 |
+
2026-01-24 22:18:30 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt (575.2MB)
|
| 376 |
+
2026-01-24 22:18:30 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt
|
| 377 |
+
2026-01-24 22:18:30 | INFO | Best 3 checkpoints:
|
| 378 |
+
2026-01-24 22:18:30 | INFO | 1. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 379 |
+
2026-01-24 22:18:30 | INFO | 2. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
|
| 380 |
+
2026-01-24 22:18:30 | INFO | 3. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 381 |
+
2026-01-24 22:18:32 | INFO | Step 1800: loss=0.0316 | IF_loss=0.0473, MQ_loss=0.0159 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000065
|
| 382 |
+
2026-01-24 22:20:24 | INFO |
|
| 383 |
+
============================================================
|
| 384 |
+
Validation Results (took 7.18s):
|
| 385 |
+
Samples: 346 instruction, 346 quality
|
| 386 |
+
Instruction Acc: 0.6908
|
| 387 |
+
Quality Acc: 0.6474
|
| 388 |
+
Average Acc: 0.6691
|
| 389 |
+
Total Loss: 1.7090
|
| 390 |
+
Instruction Loss: 1.5445
|
| 391 |
+
Quality Loss: 1.8734
|
| 392 |
+
============================================================
|
| 393 |
+
2026-01-24 22:20:24 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt (filtered to 38.584M trainable parameters)
|
| 394 |
+
2026-01-24 22:20:25 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt (575.2MB)
|
| 395 |
+
2026-01-24 22:20:25 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt
|
| 396 |
+
2026-01-24 22:20:25 | INFO | Best 3 checkpoints:
|
| 397 |
+
2026-01-24 22:20:25 | INFO | 1. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 398 |
+
2026-01-24 22:20:25 | INFO | 2. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
|
| 399 |
+
2026-01-24 22:20:25 | INFO | 3. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
|
| 400 |
+
2026-01-24 22:20:26 | INFO | Step 1900: loss=0.0415 | IF_loss=0.0539, MQ_loss=0.0290 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000061
|
| 401 |
+
2026-01-24 22:22:22 | INFO |
|
| 402 |
+
============================================================
|
| 403 |
+
Validation Results (took 7.27s):
|
| 404 |
+
Samples: 346 instruction, 346 quality
|
| 405 |
+
Instruction Acc: 0.6936
|
| 406 |
+
Quality Acc: 0.6474
|
| 407 |
+
Average Acc: 0.6705
|
| 408 |
+
Total Loss: 1.7083
|
| 409 |
+
Instruction Loss: 1.5455
|
| 410 |
+
Quality Loss: 1.8711
|
| 411 |
+
============================================================
|
| 412 |
+
2026-01-24 22:22:22 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt (filtered to 38.584M trainable parameters)
|
| 413 |
+
2026-01-24 22:22:22 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt (575.2MB)
|
| 414 |
+
2026-01-24 22:22:22 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt
|
| 415 |
+
2026-01-24 22:22:22 | INFO | Best 3 checkpoints:
|
| 416 |
+
2026-01-24 22:22:22 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 417 |
+
2026-01-24 22:22:22 | INFO | 2. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 418 |
+
2026-01-24 22:22:22 | INFO | 3. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
|
| 419 |
+
2026-01-24 22:22:23 | INFO | Step 2000: loss=0.0589 | IF_loss=0.0511, MQ_loss=0.0667 | acc=0.979 (IF=0.979, MQ=0.979) | lr=0.000056
|
| 420 |
+
2026-01-24 22:22:23 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt (filtered to 38.584M trainable parameters)
|
| 421 |
+
2026-01-24 22:22:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt (575.2MB)
|
| 422 |
+
2026-01-24 22:22:24 | INFO | Step 2000: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt
|
| 423 |
+
2026-01-24 22:24:18 | INFO |
|
| 424 |
+
============================================================
|
| 425 |
+
Validation Results (took 7.25s):
|
| 426 |
+
Samples: 346 instruction, 346 quality
|
| 427 |
+
Instruction Acc: 0.6879
|
| 428 |
+
Quality Acc: 0.6474
|
| 429 |
+
Average Acc: 0.6676
|
| 430 |
+
Total Loss: 1.7086
|
| 431 |
+
Instruction Loss: 1.5472
|
| 432 |
+
Quality Loss: 1.8700
|
| 433 |
+
============================================================
|
| 434 |
+
2026-01-24 22:24:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt (filtered to 38.584M trainable parameters)
|
| 435 |
+
2026-01-24 22:24:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt (575.2MB)
|
| 436 |
+
2026-01-24 22:24:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt
|
| 437 |
+
2026-01-24 22:24:19 | INFO | Best 3 checkpoints:
|
| 438 |
+
2026-01-24 22:24:19 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 439 |
+
2026-01-24 22:24:19 | INFO | 2. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 440 |
+
2026-01-24 22:24:19 | INFO | 3. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
|
| 441 |
+
2026-01-24 22:24:20 | INFO | Step 2100: loss=0.0284 | IF_loss=0.0286, MQ_loss=0.0281 | acc=0.990 (IF=1.000, MQ=0.979) | lr=0.000052
|
| 442 |
+
2026-01-24 22:26:12 | INFO |
|
| 443 |
+
============================================================
|
| 444 |
+
Validation Results (took 7.00s):
|
| 445 |
+
Samples: 346 instruction, 346 quality
|
| 446 |
+
Instruction Acc: 0.6879
|
| 447 |
+
Quality Acc: 0.6503
|
| 448 |
+
Average Acc: 0.6691
|
| 449 |
+
Total Loss: 1.7083
|
| 450 |
+
Instruction Loss: 1.5495
|
| 451 |
+
Quality Loss: 1.8672
|
| 452 |
+
============================================================
|
| 453 |
+
2026-01-24 22:26:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt (filtered to 38.584M trainable parameters)
|
| 454 |
+
2026-01-24 22:26:13 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt (575.2MB)
|
| 455 |
+
2026-01-24 22:26:13 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt
|
| 456 |
+
2026-01-24 22:26:13 | INFO | Best 3 checkpoints:
|
| 457 |
+
2026-01-24 22:26:13 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 458 |
+
2026-01-24 22:26:13 | INFO | 2. Step 2199: acc=0.6691 (reward_model.best_2199.pt)
|
| 459 |
+
2026-01-24 22:26:13 | INFO | 3. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 460 |
+
2026-01-24 22:26:14 | INFO | Step 2200: loss=0.0061 | IF_loss=0.0038, MQ_loss=0.0085 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000048
|
| 461 |
+
2026-01-24 22:28:05 | INFO |
|
| 462 |
+
============================================================
|
| 463 |
+
Validation Results (took 7.64s):
|
| 464 |
+
Samples: 346 instruction, 346 quality
|
| 465 |
+
Instruction Acc: 0.6879
|
| 466 |
+
Quality Acc: 0.6532
|
| 467 |
+
Average Acc: 0.6705
|
| 468 |
+
Total Loss: 1.7088
|
| 469 |
+
Instruction Loss: 1.5525
|
| 470 |
+
Quality Loss: 1.8651
|
| 471 |
+
============================================================
|
| 472 |
+
2026-01-24 22:28:05 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt (filtered to 38.584M trainable parameters)
|
| 473 |
+
2026-01-24 22:28:05 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt (575.2MB)
|
| 474 |
+
2026-01-24 22:28:05 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt
|
| 475 |
+
2026-01-24 22:28:05 | INFO | Best 3 checkpoints:
|
| 476 |
+
2026-01-24 22:28:05 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 477 |
+
2026-01-24 22:28:05 | INFO | 2. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 478 |
+
2026-01-24 22:28:05 | INFO | 3. Step 2199: acc=0.6691 (reward_model.best_2199.pt)
|
| 479 |
+
2026-01-24 22:28:06 | INFO | Step 2300: loss=0.0451 | IF_loss=0.0768, MQ_loss=0.0134 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000044
|
| 480 |
+
2026-01-24 22:30:00 | INFO |
|
| 481 |
+
============================================================
|
| 482 |
+
Validation Results (took 7.30s):
|
| 483 |
+
Samples: 346 instruction, 346 quality
|
| 484 |
+
Instruction Acc: 0.6908
|
| 485 |
+
Quality Acc: 0.6532
|
| 486 |
+
Average Acc: 0.6720
|
| 487 |
+
Total Loss: 1.7079
|
| 488 |
+
Instruction Loss: 1.5530
|
| 489 |
+
Quality Loss: 1.8628
|
| 490 |
+
============================================================
|
| 491 |
+
2026-01-24 22:30:00 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt (filtered to 38.584M trainable parameters)
|
| 492 |
+
2026-01-24 22:30:01 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt (575.2MB)
|
| 493 |
+
2026-01-24 22:30:01 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt
|
| 494 |
+
2026-01-24 22:30:01 | INFO | Best 3 checkpoints:
|
| 495 |
+
2026-01-24 22:30:01 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 496 |
+
2026-01-24 22:30:01 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 497 |
+
2026-01-24 22:30:01 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 498 |
+
2026-01-24 22:30:02 | INFO | Step 2400: loss=0.0141 | IF_loss=0.0160, MQ_loss=0.0122 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000039
|
| 499 |
+
2026-01-24 22:31:52 | INFO |
|
| 500 |
+
============================================================
|
| 501 |
+
Validation Results (took 6.60s):
|
| 502 |
+
Samples: 346 instruction, 346 quality
|
| 503 |
+
Instruction Acc: 0.6879
|
| 504 |
+
Quality Acc: 0.6503
|
| 505 |
+
Average Acc: 0.6691
|
| 506 |
+
Total Loss: 1.7095
|
| 507 |
+
Instruction Loss: 1.5571
|
| 508 |
+
Quality Loss: 1.8619
|
| 509 |
+
============================================================
|
| 510 |
+
2026-01-24 22:31:53 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt (filtered to 38.584M trainable parameters)
|
| 511 |
+
2026-01-24 22:31:53 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt (575.2MB)
|
| 512 |
+
2026-01-24 22:31:53 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt
|
| 513 |
+
2026-01-24 22:31:53 | INFO | Best 3 checkpoints:
|
| 514 |
+
2026-01-24 22:31:53 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 515 |
+
2026-01-24 22:31:53 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 516 |
+
2026-01-24 22:31:53 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 517 |
+
2026-01-24 22:31:54 | INFO | Step 2500: loss=0.0073 | IF_loss=0.0109, MQ_loss=0.0036 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000035
|
| 518 |
+
2026-01-24 22:33:43 | INFO |
|
| 519 |
+
============================================================
|
| 520 |
+
Validation Results (took 6.93s):
|
| 521 |
+
Samples: 346 instruction, 346 quality
|
| 522 |
+
Instruction Acc: 0.6879
|
| 523 |
+
Quality Acc: 0.6503
|
| 524 |
+
Average Acc: 0.6691
|
| 525 |
+
Total Loss: 1.7093
|
| 526 |
+
Instruction Loss: 1.5586
|
| 527 |
+
Quality Loss: 1.8601
|
| 528 |
+
============================================================
|
| 529 |
+
2026-01-24 22:33:43 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt (filtered to 38.584M trainable parameters)
|
| 530 |
+
2026-01-24 22:33:43 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt (575.2MB)
|
| 531 |
+
2026-01-24 22:33:43 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt
|
| 532 |
+
2026-01-24 22:33:43 | INFO | Best 3 checkpoints:
|
| 533 |
+
2026-01-24 22:33:43 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 534 |
+
2026-01-24 22:33:43 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 535 |
+
2026-01-24 22:33:43 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 536 |
+
2026-01-24 22:33:44 | INFO | Step 2600: loss=0.0025 | IF_loss=0.0039, MQ_loss=0.0011 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000031
|
| 537 |
+
2026-01-24 22:35:39 | INFO |
|
| 538 |
+
============================================================
|
| 539 |
+
Validation Results (took 6.71s):
|
| 540 |
+
Samples: 346 instruction, 346 quality
|
| 541 |
+
Instruction Acc: 0.6879
|
| 542 |
+
Quality Acc: 0.6503
|
| 543 |
+
Average Acc: 0.6691
|
| 544 |
+
Total Loss: 1.7105
|
| 545 |
+
Instruction Loss: 1.5632
|
| 546 |
+
Quality Loss: 1.8577
|
| 547 |
+
============================================================
|
| 548 |
+
2026-01-24 22:35:39 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt (filtered to 38.584M trainable parameters)
|
| 549 |
+
2026-01-24 22:35:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt (575.2MB)
|
| 550 |
+
2026-01-24 22:35:39 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt
|
| 551 |
+
2026-01-24 22:35:39 | INFO | Best 3 checkpoints:
|
| 552 |
+
2026-01-24 22:35:39 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 553 |
+
2026-01-24 22:35:39 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 554 |
+
2026-01-24 22:35:39 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 555 |
+
2026-01-24 22:35:40 | INFO | Step 2700: loss=0.0285 | IF_loss=0.0436, MQ_loss=0.0134 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000028
|
| 556 |
+
2026-01-24 22:37:31 | INFO |
|
| 557 |
+
============================================================
|
| 558 |
+
Validation Results (took 7.15s):
|
| 559 |
+
Samples: 346 instruction, 346 quality
|
| 560 |
+
Instruction Acc: 0.6850
|
| 561 |
+
Quality Acc: 0.6503
|
| 562 |
+
Average Acc: 0.6676
|
| 563 |
+
Total Loss: 1.7119
|
| 564 |
+
Instruction Loss: 1.5662
|
| 565 |
+
Quality Loss: 1.8576
|
| 566 |
+
============================================================
|
| 567 |
+
2026-01-24 22:37:31 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt (filtered to 38.584M trainable parameters)
|
| 568 |
+
2026-01-24 22:37:32 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt (575.2MB)
|
| 569 |
+
2026-01-24 22:37:32 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt
|
| 570 |
+
2026-01-24 22:37:32 | INFO | Best 3 checkpoints:
|
| 571 |
+
2026-01-24 22:37:32 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 572 |
+
2026-01-24 22:37:32 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 573 |
+
2026-01-24 22:37:32 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 574 |
+
2026-01-24 22:37:33 | INFO | Step 2800: loss=0.0054 | IF_loss=0.0086, MQ_loss=0.0023 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000024
|
| 575 |
+
2026-01-24 22:39:25 | INFO |
|
| 576 |
+
============================================================
|
| 577 |
+
Validation Results (took 6.52s):
|
| 578 |
+
Samples: 346 instruction, 346 quality
|
| 579 |
+
Instruction Acc: 0.6879
|
| 580 |
+
Quality Acc: 0.6503
|
| 581 |
+
Average Acc: 0.6691
|
| 582 |
+
Total Loss: 1.7105
|
| 583 |
+
Instruction Loss: 1.5670
|
| 584 |
+
Quality Loss: 1.8540
|
| 585 |
+
============================================================
|
| 586 |
+
2026-01-24 22:39:25 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt (filtered to 38.584M trainable parameters)
|
| 587 |
+
2026-01-24 22:39:26 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt (575.2MB)
|
| 588 |
+
2026-01-24 22:39:26 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt
|
| 589 |
+
2026-01-24 22:39:26 | INFO | Best 3 checkpoints:
|
| 590 |
+
2026-01-24 22:39:26 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 591 |
+
2026-01-24 22:39:26 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 592 |
+
2026-01-24 22:39:26 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 593 |
+
2026-01-24 22:39:27 | INFO | Step 2900: loss=0.0121 | IF_loss=0.0158, MQ_loss=0.0084 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000020
|
| 594 |
+
2026-01-24 22:41:17 | INFO |
|
| 595 |
+
============================================================
|
| 596 |
+
Validation Results (took 7.44s):
|
| 597 |
+
Samples: 346 instruction, 346 quality
|
| 598 |
+
Instruction Acc: 0.6879
|
| 599 |
+
Quality Acc: 0.6503
|
| 600 |
+
Average Acc: 0.6691
|
| 601 |
+
Total Loss: 1.7130
|
| 602 |
+
Instruction Loss: 1.5717
|
| 603 |
+
Quality Loss: 1.8543
|
| 604 |
+
============================================================
|
| 605 |
+
2026-01-24 22:41:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt (filtered to 38.584M trainable parameters)
|
| 606 |
+
2026-01-24 22:41:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt (575.2MB)
|
| 607 |
+
2026-01-24 22:41:18 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt
|
| 608 |
+
2026-01-24 22:41:18 | INFO | Best 3 checkpoints:
|
| 609 |
+
2026-01-24 22:41:18 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 610 |
+
2026-01-24 22:41:18 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 611 |
+
2026-01-24 22:41:18 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 612 |
+
2026-01-24 22:41:19 | INFO | Step 3000: loss=0.0040 | IF_loss=0.0024, MQ_loss=0.0055 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000017
|
| 613 |
+
2026-01-24 22:43:12 | INFO |
|
| 614 |
+
============================================================
|
| 615 |
+
Validation Results (took 6.84s):
|
| 616 |
+
Samples: 346 instruction, 346 quality
|
| 617 |
+
Instruction Acc: 0.6908
|
| 618 |
+
Quality Acc: 0.6503
|
| 619 |
+
Average Acc: 0.6705
|
| 620 |
+
Total Loss: 1.7137
|
| 621 |
+
Instruction Loss: 1.5743
|
| 622 |
+
Quality Loss: 1.8532
|
| 623 |
+
============================================================
|
| 624 |
+
2026-01-24 22:43:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt (filtered to 38.584M trainable parameters)
|
| 625 |
+
2026-01-24 22:43:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt (575.2MB)
|
| 626 |
+
2026-01-24 22:43:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt
|
| 627 |
+
2026-01-24 22:43:12 | INFO | Best 3 checkpoints:
|
| 628 |
+
2026-01-24 22:43:12 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 629 |
+
2026-01-24 22:43:12 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 630 |
+
2026-01-24 22:43:12 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 631 |
+
2026-01-24 22:43:13 | INFO | Step 3100: loss=0.0095 | IF_loss=0.0161, MQ_loss=0.0029 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000014
|
| 632 |
+
2026-01-24 22:45:04 | INFO |
|
| 633 |
+
============================================================
|
| 634 |
+
Validation Results (took 7.66s):
|
| 635 |
+
Samples: 346 instruction, 346 quality
|
| 636 |
+
Instruction Acc: 0.6879
|
| 637 |
+
Quality Acc: 0.6503
|
| 638 |
+
Average Acc: 0.6691
|
| 639 |
+
Total Loss: 1.7135
|
| 640 |
+
Instruction Loss: 1.5760
|
| 641 |
+
Quality Loss: 1.8510
|
| 642 |
+
============================================================
|
| 643 |
+
2026-01-24 22:45:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt (filtered to 38.584M trainable parameters)
|
| 644 |
+
2026-01-24 22:45:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt (575.2MB)
|
| 645 |
+
2026-01-24 22:45:04 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt
|
| 646 |
+
2026-01-24 22:45:04 | INFO | Best 3 checkpoints:
|
| 647 |
+
2026-01-24 22:45:04 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 648 |
+
2026-01-24 22:45:04 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 649 |
+
2026-01-24 22:45:04 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 650 |
+
2026-01-24 22:45:08 | INFO | Step 3200: loss=0.0050 | IF_loss=0.0072, MQ_loss=0.0027 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000011
|
| 651 |
+
2026-01-24 22:46:57 | INFO |
|
| 652 |
+
============================================================
|
| 653 |
+
Validation Results (took 6.84s):
|
| 654 |
+
Samples: 346 instruction, 346 quality
|
| 655 |
+
Instruction Acc: 0.6879
|
| 656 |
+
Quality Acc: 0.6503
|
| 657 |
+
Average Acc: 0.6691
|
| 658 |
+
Total Loss: 1.7154
|
| 659 |
+
Instruction Loss: 1.5809
|
| 660 |
+
Quality Loss: 1.8499
|
| 661 |
+
============================================================
|
| 662 |
+
2026-01-24 22:46:57 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt (filtered to 38.584M trainable parameters)
|
| 663 |
+
2026-01-24 22:46:57 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt (575.2MB)
|
| 664 |
+
2026-01-24 22:46:57 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt
|
| 665 |
+
2026-01-24 22:46:57 | INFO | Best 3 checkpoints:
|
| 666 |
+
2026-01-24 22:46:57 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 667 |
+
2026-01-24 22:46:57 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 668 |
+
2026-01-24 22:46:57 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 669 |
+
2026-01-24 22:46:58 | INFO | Step 3300: loss=0.0362 | IF_loss=0.0503, MQ_loss=0.0221 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000009
|
| 670 |
+
2026-01-24 22:48:50 | INFO |
|
| 671 |
+
============================================================
|
| 672 |
+
Validation Results (took 6.83s):
|
| 673 |
+
Samples: 346 instruction, 346 quality
|
| 674 |
+
Instruction Acc: 0.6879
|
| 675 |
+
Quality Acc: 0.6532
|
| 676 |
+
Average Acc: 0.6705
|
| 677 |
+
Total Loss: 1.7154
|
| 678 |
+
Instruction Loss: 1.5832
|
| 679 |
+
Quality Loss: 1.8477
|
| 680 |
+
============================================================
|
| 681 |
+
2026-01-24 22:48:50 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt (filtered to 38.584M trainable parameters)
|
| 682 |
+
2026-01-24 22:48:51 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt (575.2MB)
|
| 683 |
+
2026-01-24 22:48:51 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt
|
| 684 |
+
2026-01-24 22:48:51 | INFO | Best 3 checkpoints:
|
| 685 |
+
2026-01-24 22:48:51 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 686 |
+
2026-01-24 22:48:51 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 687 |
+
2026-01-24 22:48:51 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 688 |
+
2026-01-24 22:48:52 | INFO | Step 3400: loss=0.0082 | IF_loss=0.0113, MQ_loss=0.0051 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000006
|
| 689 |
+
2026-01-24 22:50:41 | INFO |
|
| 690 |
+
============================================================
|
| 691 |
+
Validation Results (took 7.19s):
|
| 692 |
+
Samples: 346 instruction, 346 quality
|
| 693 |
+
Instruction Acc: 0.6908
|
| 694 |
+
Quality Acc: 0.6590
|
| 695 |
+
Average Acc: 0.6749
|
| 696 |
+
Total Loss: 1.7151
|
| 697 |
+
Instruction Loss: 1.5847
|
| 698 |
+
Quality Loss: 1.8456
|
| 699 |
+
============================================================
|
| 700 |
+
2026-01-24 22:50:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3499.pt (filtered to 38.584M trainable parameters)
|
| 701 |
+
2026-01-24 22:50:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3499.pt (575.2MB)
|
| 702 |
+
2026-01-24 22:50:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt
|
| 703 |
+
2026-01-24 22:50:41 | INFO | Best 3 checkpoints:
|
| 704 |
+
2026-01-24 22:50:41 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 705 |
+
2026-01-24 22:50:41 | INFO | 2. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 706 |
+
2026-01-24 22:50:41 | INFO | 3. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 707 |
+
2026-01-24 22:50:42 | INFO | Step 3500: loss=0.0045 | IF_loss=0.0077, MQ_loss=0.0013 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000005
|
| 708 |
+
2026-01-24 22:52:33 | INFO |
|
| 709 |
+
============================================================
|
| 710 |
+
Validation Results (took 7.08s):
|
| 711 |
+
Samples: 346 instruction, 346 quality
|
| 712 |
+
Instruction Acc: 0.6879
|
| 713 |
+
Quality Acc: 0.6590
|
| 714 |
+
Average Acc: 0.6734
|
| 715 |
+
Total Loss: 1.7160
|
| 716 |
+
Instruction Loss: 1.5876
|
| 717 |
+
Quality Loss: 1.8445
|
| 718 |
+
============================================================
|
| 719 |
+
2026-01-24 22:52:33 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt (filtered to 38.584M trainable parameters)
|
| 720 |
+
2026-01-24 22:52:34 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt (575.2MB)
|
| 721 |
+
2026-01-24 22:52:34 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt
|
| 722 |
+
2026-01-24 22:52:34 | INFO | Best 3 checkpoints:
|
| 723 |
+
2026-01-24 22:52:34 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 724 |
+
2026-01-24 22:52:34 | INFO | 2. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
|
| 725 |
+
2026-01-24 22:52:34 | INFO | 3. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 726 |
+
2026-01-24 22:52:35 | INFO | Step 3600: loss=0.0126 | IF_loss=0.0220, MQ_loss=0.0031 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000003
|
| 727 |
+
2026-01-24 22:54:24 | INFO |
|
| 728 |
+
============================================================
|
| 729 |
+
Validation Results (took 7.20s):
|
| 730 |
+
Samples: 346 instruction, 346 quality
|
| 731 |
+
Instruction Acc: 0.6879
|
| 732 |
+
Quality Acc: 0.6590
|
| 733 |
+
Average Acc: 0.6734
|
| 734 |
+
Total Loss: 1.7161
|
| 735 |
+
Instruction Loss: 1.5894
|
| 736 |
+
Quality Loss: 1.8428
|
| 737 |
+
============================================================
|
| 738 |
+
2026-01-24 22:54:24 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt (filtered to 38.584M trainable parameters)
|
| 739 |
+
2026-01-24 22:54:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt (575.2MB)
|
| 740 |
+
2026-01-24 22:54:24 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt
|
| 741 |
+
2026-01-24 22:54:24 | INFO | Best 3 checkpoints:
|
| 742 |
+
2026-01-24 22:54:24 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 743 |
+
2026-01-24 22:54:24 | INFO | 2. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
|
| 744 |
+
2026-01-24 22:54:24 | INFO | 3. Step 3699: acc=0.6734 (reward_model.best_3699.pt)
|
| 745 |
+
2026-01-24 22:54:25 | INFO | Step 3700: loss=0.0085 | IF_loss=0.0041, MQ_loss=0.0130 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000002
|
| 746 |
+
2026-01-24 22:56:18 | INFO |
|
| 747 |
+
============================================================
|
| 748 |
+
Validation Results (took 6.85s):
|
| 749 |
+
Samples: 346 instruction, 346 quality
|
| 750 |
+
Instruction Acc: 0.6879
|
| 751 |
+
Quality Acc: 0.6618
|
| 752 |
+
Average Acc: 0.6749
|
| 753 |
+
Total Loss: 1.7157
|
| 754 |
+
Instruction Loss: 1.5912
|
| 755 |
+
Quality Loss: 1.8403
|
| 756 |
+
============================================================
|
| 757 |
+
2026-01-24 22:56:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3799.pt (filtered to 38.584M trainable parameters)
|
| 758 |
+
2026-01-24 22:56:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3799.pt (575.2MB)
|
| 759 |
+
2026-01-24 22:56:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt
|
| 760 |
+
2026-01-24 22:56:19 | INFO | Best 3 checkpoints:
|
| 761 |
+
2026-01-24 22:56:19 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 762 |
+
2026-01-24 22:56:19 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
|
| 763 |
+
2026-01-24 22:56:19 | INFO | 3. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
|
| 764 |
+
2026-01-24 22:56:20 | INFO | Step 3800: loss=0.0120 | IF_loss=0.0037, MQ_loss=0.0202 | acc=0.990 (IF=1.000, MQ=0.979) | lr=0.000001
|
| 765 |
+
2026-01-24 22:58:09 | INFO |
|
| 766 |
+
============================================================
|
| 767 |
+
Validation Results (took 7.39s):
|
| 768 |
+
Samples: 346 instruction, 346 quality
|
| 769 |
+
Instruction Acc: 0.6908
|
| 770 |
+
Quality Acc: 0.6590
|
| 771 |
+
Average Acc: 0.6749
|
| 772 |
+
Total Loss: 1.7163
|
| 773 |
+
Instruction Loss: 1.5935
|
| 774 |
+
Quality Loss: 1.8391
|
| 775 |
+
============================================================
|
| 776 |
+
2026-01-24 22:58:09 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3899.pt (filtered to 38.584M trainable parameters)
|
| 777 |
+
2026-01-24 22:58:10 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3899.pt (575.2MB)
|
| 778 |
+
2026-01-24 22:58:10 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt
|
| 779 |
+
2026-01-24 22:58:10 | INFO | Best 3 checkpoints:
|
| 780 |
+
2026-01-24 22:58:10 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 781 |
+
2026-01-24 22:58:10 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
|
| 782 |
+
2026-01-24 22:58:10 | INFO | 3. Step 3899: acc=0.6749 (reward_model.best_3899.pt)
|
| 783 |
+
2026-01-24 22:58:11 | INFO | Step 3900: loss=0.0060 | IF_loss=0.0040, MQ_loss=0.0080 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000000
|
| 784 |
+
2026-01-24 23:00:02 | INFO |
|
| 785 |
+
============================================================
|
| 786 |
+
Validation Results (took 6.60s):
|
| 787 |
+
Samples: 346 instruction, 346 quality
|
| 788 |
+
Instruction Acc: 0.6908
|
| 789 |
+
Quality Acc: 0.6590
|
| 790 |
+
Average Acc: 0.6749
|
| 791 |
+
Total Loss: 1.7165
|
| 792 |
+
Instruction Loss: 1.5967
|
| 793 |
+
Quality Loss: 1.8363
|
| 794 |
+
============================================================
|
| 795 |
+
2026-01-24 23:00:02 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt (filtered to 38.584M trainable parameters)
|
| 796 |
+
2026-01-24 23:00:02 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt (575.2MB)
|
| 797 |
+
2026-01-24 23:00:02 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt
|
| 798 |
+
2026-01-24 23:00:02 | INFO | Best 3 checkpoints:
|
| 799 |
+
2026-01-24 23:00:02 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 800 |
+
2026-01-24 23:00:02 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
|
| 801 |
+
2026-01-24 23:00:02 | INFO | 3. Step 3899: acc=0.6749 (reward_model.best_3899.pt)
|
| 802 |
+
2026-01-24 23:00:02 | INFO | Training complete!
|
| 803 |
+
2026-01-24 23:00:02 | INFO | Training complete!
|
finetune_human/20260124_2354/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '3'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: false
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 4000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 4000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|