HaiwenXia commited on
Commit
5eb0aae
·
verified ·
1 Parent(s): 87136b4

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. contrastive/20260123_1403_tune_mulan_transformer/config copy.yaml +163 -0
  2. contrastive/20260123_1403_tune_mulan_transformer/config.yaml +163 -0
  3. contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.0608797/events.out.tfevents.1769148247.MACLAB-S004.302904.1 +3 -0
  4. contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.062121/hparams.yml +5 -0
  5. contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/events.out.tfevents.1769148247.MACLAB-S004.302904.0 +3 -0
  6. contrastive/20260123_1403_tune_mulan_transformer/logs/train.log +15 -0
  7. contrastive/train.20260123_1403_tune_mulan_transformer.log +0 -0
  8. downstream/20260121_1942/config.yaml +56 -0
  9. downstream/20260121_1942/train.log +113 -0
  10. downstream/20260121_2108/config.yaml +56 -0
  11. downstream/20260121_2108/train.log +26 -0
  12. downstream/20260121_2112/config.yaml +56 -0
  13. downstream/20260121_2112/train.log +41 -0
  14. downstream/20260121_2116/config.yaml +56 -0
  15. downstream/20260121_2116/train.log +41 -0
  16. downstream/20260121_2117/config.yaml +56 -0
  17. downstream/20260121_2117/train.log +114 -0
  18. downstream/20260121_2145/config.yaml +56 -0
  19. downstream/20260121_2145/train.log +128 -0
  20. downstream/20260121_2200/config.yaml +59 -0
  21. downstream/20260121_2200/train.log +8 -0
  22. downstream/20260121_2202/config.yaml +59 -0
  23. downstream/20260121_2202/train.log +34 -0
  24. downstream/20260121_2203/config.yaml +55 -0
  25. downstream/20260121_2203/train.log +94 -0
  26. downstream/20260121_2243/config.yaml +55 -0
  27. downstream/20260121_2243/train.log +112 -0
  28. downstream/20260121_2300/config.yaml +56 -0
  29. downstream/20260121_2300/train.log +7 -0
  30. downstream/20260121_2319/config.yaml +55 -0
  31. downstream/20260121_2319/train.log +45 -0
  32. downstream/20260121_2327/config.yaml +56 -0
  33. downstream/20260121_2327/train.log +156 -0
  34. downstream/20260123_0028/downstream_config.yaml +54 -0
  35. downstream/20260123_0028/train.log +71 -0
  36. downstream_mixed/20260122_1200/config.yaml +56 -0
  37. downstream_mixed/20260122_1200/train.log +152 -0
  38. downstream_mixed/20260122_1955/config.yaml +69 -0
  39. downstream_mixed/20260122_1955/downstream_config.yaml +58 -0
  40. downstream_mixed/20260122_1955/train.log +153 -0
  41. downstream_mixed_linear/20260122_1143/config.yaml +144 -0
  42. downstream_mixed_linear/20260122_1143/downstream_config.yaml +56 -0
  43. downstream_mixed_linear/20260122_1143/predicted_0122_1533.jsonl +0 -0
  44. downstream_mixed_linear/20260122_1143/train.log +111 -0
  45. finetune_human/20260124_2143/config.yaml +142 -0
  46. finetune_human/20260124_2143/reward_model/1769262210.5061178/events.out.tfevents.1769262210.MACLAB-S004.2626926.1 +3 -0
  47. finetune_human/20260124_2143/reward_model/1769262210.5078583/hparams.yml +4 -0
  48. finetune_human/20260124_2143/reward_model/events.out.tfevents.1769262210.MACLAB-S004.2626926.0 +3 -0
  49. finetune_human/20260124_2143/train.20260124_2143.log +803 -0
  50. finetune_human/20260124_2354/config.yaml +142 -0
contrastive/20260123_1403_tune_mulan_transformer/config copy.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model: #* this is the actual model config used in training
2
+ name: reward
3
+ model_name: OpenMuQ/MuQ-MuLan-large
4
+ dim: 768
5
+ mode: concat_text_late
6
+ attention_mode: CA
7
+ mlp_dim: 768
8
+ output_dim: 2
9
+ sr: 24000
10
+ prompt_tf_depth: 1
11
+ joint_tf_depth: 1
12
+ dim_head: 64
13
+ heads: 8
14
+ attn_dropout: 0.0
15
+ ff_dropout: 0.0
16
+ ff_mult: 4
17
+ use_layer_idx: -1
18
+ freeze_audio: true
19
+ freeze_text: true
20
+ train_muq_depth: 0
21
+ text_encoder:
22
+ name: muq_mulan
23
+ tune: transformer
24
+ model_name: google/flan-t5-base
25
+ max_seq_len: 512
26
+ tune_last_n_layers: 6
27
+ use_lora: false
28
+ lora_r: 64
29
+ lora_alpha: 64
30
+ lora_dropout: 0.1
31
+ lora_target_modules: null
32
+ gradient_checkpointing: true
33
+ downsample:
34
+ eval: mean_4x
35
+ ref: null
36
+ text: mlp
37
+ configs:
38
+ none:
39
+ kind: none
40
+ factor: 1
41
+ mean:
42
+ kind: mean
43
+ factor: 10
44
+ mean_4x:
45
+ kind: mean+mlp
46
+ factor: 4
47
+ mlp_ratio: 2.0
48
+ dropout: 0.0
49
+ conv_4x:
50
+ kind: conv
51
+ factor: 4
52
+ stage: 1
53
+ kernel_size: 5
54
+ use_layernorm: true
55
+ conv2_4x:
56
+ kind: conv*2
57
+ factor: 4
58
+ kernel_size: 5
59
+ use_layernorm: true
60
+ glu_4x:
61
+ kind: gluconv*2+pw
62
+ factor: 4
63
+ kernel_size: 5
64
+ use_layernorm: true
65
+ mlp:
66
+ kind: mean
67
+ factor: 1
68
+ mean_10x:
69
+ kind: mean+mlp
70
+ factor: 10
71
+ mlp_ratio: 2.0
72
+ dropout: 0.0
73
+ mean_30x:
74
+ kind: mean+mlp
75
+ factor: 30
76
+ mlp_ratio: 2.0
77
+ dropout: 0.0
78
+ text_lora_config: null
79
+ null_embedding:
80
+ text:
81
+ dropout: 0.2
82
+ length: 10
83
+ lyrics:
84
+ dropout: 0.3
85
+ length: 10
86
+ audio:
87
+ dropout: 0.5
88
+ length: 10
89
+ category_embeddings: null
90
+ load_config:
91
+ checkpoint_path: null
92
+ frozen_from_pretrained: true
93
+ pretrained_name: OpenMuQ/MuQ-MuLan-large
94
+ strict: false
95
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
96
+ run_name: tune_mulan_transformer
97
+ basics:
98
+ save_dir: ${project_root}/experiments/contrastive
99
+ random_seed: 42
100
+ data:
101
+ mode: raw_text_frozen_audio
102
+ dataset_dir: ${project_root}/CMI-Training/contrastive_training
103
+ generation_index: null
104
+ max_samples: null
105
+ require_embeddings: true
106
+ require_scores: false
107
+ muq_mulan_model: OpenMuQ/MuQ-MuLan-large
108
+ sample_rate: 24000
109
+ gen_max_duration: 600.0
110
+ prompt_max_duration: 600.0
111
+ audio_trick:
112
+ enabled: true
113
+ prob: 0.3
114
+ duration: 10.0
115
+ audio_dropout:
116
+ enabled: true
117
+ min_duration: 500
118
+ max_duration: 1200
119
+ apply_to_eval: true
120
+ apply_to_ref: true
121
+ eval_only_on_training: true
122
+ val_frac: 0.01
123
+ loss:
124
+ loss_schedule:
125
+ use_matching_loss: 0
126
+ use_classification_loss: 0
127
+ use_hard_negatives: 0
128
+ classification_temp: 1.0
129
+ pos_scale: 2.0
130
+ threshold_mode: momentum
131
+ threshold_momentum: 0.99
132
+ global_threshold_path: null
133
+ threshold_percentile: 0.5
134
+ hard_negative_ratio: 0.8
135
+ similarity_fn: matching
136
+ matching_temperature: 0.07
137
+ use_queue: false
138
+ queue_size: 4096
139
+ train:
140
+ num_epochs: 100
141
+ output_dir: experiments/contrastive_learning
142
+ optimizer:
143
+ lr: 0.0005
144
+ muqmulan_lr: 0.0001
145
+ weight_decay: 0.01
146
+ beta1: 0.9
147
+ beta2: 0.999
148
+ scheduler:
149
+ type: cosine
150
+ warmup_steps: 1000
151
+ min_lr: 1.0e-05
152
+ gradient_accumulation_steps: 3
153
+ max_grad_norm: 10.0
154
+ mixed_precision: bf16
155
+ log_with: tensorboard
156
+ log_interval: 10
157
+ val_interval: 1
158
+ save_interval: 5
159
+ resume_from_checkpoint: null
160
+ batch_size: 40
161
+ matching_only_batch_size: 48
162
+ num_workers: 4
163
+ DEVICES: 6,7
contrastive/20260123_1403_tune_mulan_transformer/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model: #* this might be used for downstream tasks
2
+ name: reward
3
+ model_name: OpenMuQ/MuQ-MuLan-large
4
+ dim: 768
5
+ mode: concat_text_late
6
+ attention_mode: CA
7
+ mlp_dim: 768
8
+ output_dim: 2
9
+ sr: 24000
10
+ prompt_tf_depth: 1
11
+ joint_tf_depth: 1
12
+ dim_head: 64
13
+ heads: 8
14
+ attn_dropout: 0.0
15
+ ff_dropout: 0.0
16
+ ff_mult: 4
17
+ use_layer_idx: -1
18
+ freeze_audio: true
19
+ freeze_text: true
20
+ train_muq_depth: 0
21
+ text_encoder:
22
+ name: muq_mulan
23
+ tune: null
24
+ model_name: google/flan-t5-base
25
+ max_seq_len: 512
26
+ tune_last_n_layers: 6
27
+ use_lora: false
28
+ lora_r: 64
29
+ lora_alpha: 64
30
+ lora_dropout: 0.1
31
+ lora_target_modules: null
32
+ gradient_checkpointing: true
33
+ downsample:
34
+ eval: mean_4x
35
+ ref: null
36
+ text: mlp
37
+ configs:
38
+ none:
39
+ kind: none
40
+ factor: 1
41
+ mean:
42
+ kind: mean
43
+ factor: 10
44
+ mean_4x:
45
+ kind: mean+mlp
46
+ factor: 4
47
+ mlp_ratio: 2.0
48
+ dropout: 0.0
49
+ conv_4x:
50
+ kind: conv
51
+ factor: 4
52
+ stage: 1
53
+ kernel_size: 5
54
+ use_layernorm: true
55
+ conv2_4x:
56
+ kind: conv*2
57
+ factor: 4
58
+ kernel_size: 5
59
+ use_layernorm: true
60
+ glu_4x:
61
+ kind: gluconv*2+pw
62
+ factor: 4
63
+ kernel_size: 5
64
+ use_layernorm: true
65
+ mlp:
66
+ kind: mean
67
+ factor: 1
68
+ mean_10x:
69
+ kind: mean+mlp
70
+ factor: 10
71
+ mlp_ratio: 2.0
72
+ dropout: 0.0
73
+ mean_30x:
74
+ kind: mean+mlp
75
+ factor: 30
76
+ mlp_ratio: 2.0
77
+ dropout: 0.0
78
+ text_lora_config: null
79
+ null_embedding:
80
+ text:
81
+ dropout: 0.2
82
+ length: 10
83
+ lyrics:
84
+ dropout: 0.3
85
+ length: 10
86
+ audio:
87
+ dropout: 0.5
88
+ length: 10
89
+ category_embeddings: null
90
+ load_config:
91
+ checkpoint_path: null
92
+ frozen_from_pretrained: true
93
+ pretrained_name: OpenMuQ/MuQ-MuLan-large
94
+ strict: false
95
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
96
+ run_name: tune_mulan_transformer
97
+ basics:
98
+ save_dir: ${project_root}/experiments/contrastive
99
+ random_seed: 42
100
+ data:
101
+ mode: raw_text_frozen_audio
102
+ dataset_dir: ${project_root}/CMI-Training/contrastive_training
103
+ generation_index: null
104
+ max_samples: null
105
+ require_embeddings: true
106
+ require_scores: false
107
+ muq_mulan_model: OpenMuQ/MuQ-MuLan-large
108
+ sample_rate: 24000
109
+ gen_max_duration: 600.0
110
+ prompt_max_duration: 600.0
111
+ audio_trick:
112
+ enabled: true
113
+ prob: 0.3
114
+ duration: 10.0
115
+ audio_dropout:
116
+ enabled: true
117
+ min_duration: 500
118
+ max_duration: 1200
119
+ apply_to_eval: true
120
+ apply_to_ref: true
121
+ eval_only_on_training: true
122
+ val_frac: 0.01
123
+ loss:
124
+ loss_schedule:
125
+ use_matching_loss: 0
126
+ use_classification_loss: 0
127
+ use_hard_negatives: 0
128
+ classification_temp: 1.0
129
+ pos_scale: 2.0
130
+ threshold_mode: momentum
131
+ threshold_momentum: 0.99
132
+ global_threshold_path: null
133
+ threshold_percentile: 0.5
134
+ hard_negative_ratio: 0.8
135
+ similarity_fn: matching
136
+ matching_temperature: 0.07
137
+ use_queue: false
138
+ queue_size: 4096
139
+ train:
140
+ num_epochs: 100
141
+ output_dir: experiments/contrastive_learning
142
+ optimizer:
143
+ lr: 0.0005
144
+ muqmulan_lr: 0.0001
145
+ weight_decay: 0.01
146
+ beta1: 0.9
147
+ beta2: 0.999
148
+ scheduler:
149
+ type: cosine
150
+ warmup_steps: 1000
151
+ min_lr: 1.0e-05
152
+ gradient_accumulation_steps: 3
153
+ max_grad_norm: 10.0
154
+ mixed_precision: bf16
155
+ log_with: tensorboard
156
+ log_interval: 10
157
+ val_interval: 1
158
+ save_interval: 5
159
+ resume_from_checkpoint: null
160
+ batch_size: 40
161
+ matching_only_batch_size: 48
162
+ num_workers: 4
163
+ DEVICES: 6,7
contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.0608797/events.out.tfevents.1769148247.MACLAB-S004.302904.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a00f486274e8f7144a0e8f471355f64b41223095199a68f18fe63bcdc3730550
3
+ size 539
contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/1769148247.062121/hparams.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ batch_size: 40
2
+ grad_accum_steps: 3
3
+ learning_rate: 0.0005
4
+ mode: raw_text_frozen_audio
5
+ num_epochs: 100
contrastive/20260123_1403_tune_mulan_transformer/contrastive_learning/events.out.tfevents.1769148247.MACLAB-S004.302904.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa0b781f4514bb879e7966e7b64a7d314194abd176a69d93d0fdc13b4faf225
3
+ size 10147608
contrastive/20260123_1403_tune_mulan_transformer/logs/train.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-23 14:04:07 | INFO | EMA enabled: decay=0.9999, update_every=1
2
+ 2026-01-23 14:04:07 | INFO | Created model: concat_text_late mode, CA attention
3
+ 2026-01-23 14:04:07 | INFO | Gradient checkpointing enabled
4
+ 2026-01-23 14:04:07 | INFO | Created ContrastiveLoss: threshold_mode=momentum, percentile=0.5
5
+ 2026-01-23 14:04:07 | INFO | Optimizer: AdamW (lr=0.0005, muqmulan_lr=0.0001)
6
+ 2026-01-23 14:04:07 | INFO | Scheduler: Warmup(1000) + CosineAnnealing
7
+ 2026-01-23 14:04:07 | INFO | Train: 1514 batches, Val: 8 batches
8
+ 2026-01-23 14:04:07 | INFO | ✓ Audio cropping enabled: min=500, max=1200
9
+ 2026-01-23 14:04:07 | INFO | Apply to eval: True, ref: True
10
+ 2026-01-23 14:04:07 | INFO | Modes: train=random, val=center
11
+ 2026-01-23 14:04:07 | INFO | Batch sizes: normal=40, matching_only=48
12
+ 2026-01-23 14:04:07 | INFO | Parameters: 683.856M total, 71.623M trainable
13
+ 2026-01-23 14:04:07 | INFO | Trainable modules: null_text_embedding, null_lyrics_embedding, null_audio_embedding, prompt_transformer, joint_transformer, score_projector, single_score_projector, alignment_head, quality_head, text_module, eval_downsampler, ref_downsampler
14
+ 2026-01-23 14:04:07 | INFO | Trainer initialized on 2 GPU(s)
15
+ 2026-01-23 14:04:07 | INFO | Mixed precision: bf16
contrastive/train.20260123_1403_tune_mulan_transformer.log ADDED
The diff for this file is too large to render. See raw diff
 
downstream/20260121_1942/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: null
20
+ musicality:
21
+ use_mlp: false
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: false
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: false
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 1000
51
+ log_interval: 50
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:0
downstream/20260121_1942/train.log ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 19:42:29 | INFO | Starting downstream training: 20260121_1942
2
+ 2026-01-21 19:42:29 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942
3
+ 2026-01-21 19:42:29 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/config.yaml
4
+ 2026-01-21 19:42:29 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-21 19:42:29 | INFO | Dataset mode: sequential
6
+ 2026-01-21 19:42:32 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 19:42:32 | INFO | Using checkpoint config for model
8
+ 2026-01-21 19:42:38 | WARNING | Missing keys: 283
9
+ 2026-01-21 19:42:38 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 19:42:38 | INFO | Added linear head for task 'musicality'
11
+ 2026-01-21 19:42:38 | INFO | Added linear head for task 'alignment'
12
+ 2026-01-21 19:42:38 | INFO | Added linear head for task 'preference'
13
+ 2026-01-21 19:42:38 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
14
+ 2026-01-21 19:42:38 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
15
+ 2026-01-21 19:42:38 | INFO | Task 'musicality': train=4322, test=913
16
+ 2026-01-21 19:42:38 | INFO | Task 'alignment': train=1923, test=913
17
+ 2026-01-21 19:42:38 | INFO | Task 'preference': train=1065, test=275
18
+ 2026-01-21 19:42:38 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
19
+ 2026-01-21 19:42:38 | INFO |
20
+ ============================================================
21
+ 2026-01-21 19:42:38 | INFO | Starting Task 1/3: musicality
22
+ 2026-01-21 19:42:38 | INFO | ============================================================
23
+ 2026-01-21 19:42:38 | INFO | Task 'musicality' trainable parameters: 769
24
+ 2026-01-21 19:42:54 | INFO | [Task musicality][Step 50/1000] loss=2.4938 | mae=2.4938
25
+ 2026-01-21 19:43:10 | INFO | [Task musicality][Step 100/1000] loss=1.0211 | mae=1.0211
26
+ 2026-01-21 19:43:19 | INFO | [Task musicality][Step 150/1000] loss=0.8535 | mae=0.8535
27
+ 2026-01-21 19:43:32 | INFO | [Task musicality][Step 200/1000] loss=0.7434 | mae=0.7434
28
+ 2026-01-21 19:43:43 | INFO | [Task musicality][Step 250/1000] loss=0.7352 | mae=0.7352
29
+ 2026-01-21 19:43:56 | INFO | [Task musicality][Step 300/1000] loss=0.6973 | mae=0.6973
30
+ 2026-01-21 19:44:06 | INFO | [Task musicality][Step 350/1000] loss=0.6773 | mae=0.6773
31
+ 2026-01-21 19:44:19 | INFO | [Task musicality][Step 400/1000] loss=0.6354 | mae=0.6354
32
+ 2026-01-21 19:44:30 | INFO | [Task musicality][Step 450/1000] loss=0.6352 | mae=0.6352
33
+ 2026-01-21 19:44:43 | INFO | [Task musicality][Step 500/1000] loss=0.6213 | mae=0.6213
34
+ 2026-01-21 19:44:56 | INFO | [Task musicality][Step 550/1000] loss=0.6211 | mae=0.6211
35
+ 2026-01-21 19:45:08 | INFO | [Task musicality][Step 600/1000] loss=0.5835 | mae=0.5835
36
+ 2026-01-21 19:45:21 | INFO | [Task musicality][Step 650/1000] loss=0.5852 | mae=0.5852
37
+ 2026-01-21 19:45:31 | INFO | [Task musicality][Step 700/1000] loss=0.5859 | mae=0.5859
38
+ 2026-01-21 19:45:45 | INFO | [Task musicality][Step 750/1000] loss=0.5658 | mae=0.5658
39
+ 2026-01-21 19:45:55 | INFO | [Task musicality][Step 800/1000] loss=0.5481 | mae=0.5481
40
+ 2026-01-21 19:46:09 | INFO | [Task musicality][Step 850/1000] loss=0.5400 | mae=0.5400
41
+ 2026-01-21 19:46:19 | INFO | [Task musicality][Step 900/1000] loss=0.5486 | mae=0.5486
42
+ 2026-01-21 19:46:32 | INFO | [Task musicality][Step 950/1000] loss=0.5365 | mae=0.5365
43
+ 2026-01-21 19:46:46 | INFO | [Task musicality][Step 1000/1000] loss=0.5278 | mae=0.5278
44
+ 2026-01-21 19:46:52 | INFO | [Val] musicality: loss=1.4280 | mae=1.4280
45
+ 2026-01-21 19:46:52 | INFO | Task 'musicality' complete. Running validation...
46
+ 2026-01-21 19:46:57 | INFO | [Final Val for musicality] loss=1.4280 | mae=1.4280
47
+ 2026-01-21 19:46:57 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/task_musicality_final.pt
48
+ 2026-01-21 19:46:57 | INFO |
49
+ ============================================================
50
+ 2026-01-21 19:46:57 | INFO | Starting Task 2/3: alignment
51
+ 2026-01-21 19:46:57 | INFO | ============================================================
52
+ 2026-01-21 19:46:57 | INFO | Task 'alignment' trainable parameters: 769
53
+ 2026-01-21 19:47:05 | INFO | [Task alignment][Step 50/1000] loss=2.4024 | mae=2.4024
54
+ 2026-01-21 19:47:11 | INFO | [Task alignment][Step 100/1000] loss=0.9916 | mae=0.9916
55
+ 2026-01-21 19:47:18 | INFO | [Task alignment][Step 150/1000] loss=0.8597 | mae=0.8597
56
+ 2026-01-21 19:47:24 | INFO | [Task alignment][Step 200/1000] loss=0.7863 | mae=0.7863
57
+ 2026-01-21 19:47:33 | INFO | [Task alignment][Step 250/1000] loss=0.7473 | mae=0.7473
58
+ 2026-01-21 19:47:40 | INFO | [Task alignment][Step 300/1000] loss=0.7150 | mae=0.7150
59
+ 2026-01-21 19:47:46 | INFO | [Task alignment][Step 350/1000] loss=0.7370 | mae=0.7370
60
+ 2026-01-21 19:47:54 | INFO | [Task alignment][Step 400/1000] loss=0.7159 | mae=0.7159
61
+ 2026-01-21 19:48:02 | INFO | [Task alignment][Step 450/1000] loss=0.6949 | mae=0.6949
62
+ 2026-01-21 19:48:10 | INFO | [Task alignment][Step 500/1000] loss=0.6663 | mae=0.6663
63
+ 2026-01-21 19:48:18 | INFO | [Task alignment][Step 550/1000] loss=0.6535 | mae=0.6535
64
+ 2026-01-21 19:48:24 | INFO | [Task alignment][Step 600/1000] loss=0.6601 | mae=0.6601
65
+ 2026-01-21 19:48:32 | INFO | [Task alignment][Step 650/1000] loss=0.6296 | mae=0.6296
66
+ 2026-01-21 19:48:39 | INFO | [Task alignment][Step 700/1000] loss=0.5975 | mae=0.5975
67
+ 2026-01-21 19:48:45 | INFO | [Task alignment][Step 750/1000] loss=0.5953 | mae=0.5953
68
+ 2026-01-21 19:48:51 | INFO | [Task alignment][Step 800/1000] loss=0.5881 | mae=0.5881
69
+ 2026-01-21 19:49:00 | INFO | [Task alignment][Step 850/1000] loss=0.5822 | mae=0.5822
70
+ 2026-01-21 19:49:06 | INFO | [Task alignment][Step 900/1000] loss=0.5667 | mae=0.5667
71
+ 2026-01-21 19:49:12 | INFO | [Task alignment][Step 950/1000] loss=0.5752 | mae=0.5752
72
+ 2026-01-21 19:49:20 | INFO | [Task alignment][Step 1000/1000] loss=0.5615 | mae=0.5615
73
+ 2026-01-21 19:49:23 | INFO | [Val] alignment: loss=1.3554 | mae=1.3554
74
+ 2026-01-21 19:49:23 | INFO | Task 'alignment' complete. Running validation...
75
+ 2026-01-21 19:49:28 | INFO | [Final Val for alignment] loss=1.3554 | mae=1.3554
76
+ 2026-01-21 19:49:28 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/task_alignment_final.pt
77
+ 2026-01-21 19:49:28 | INFO |
78
+ ============================================================
79
+ 2026-01-21 19:49:28 | INFO | Starting Task 3/3: preference
80
+ 2026-01-21 19:49:28 | INFO | ============================================================
81
+ 2026-01-21 19:49:28 | INFO | Task 'preference' trainable parameters: 769
82
+ 2026-01-21 19:50:48 | INFO | [Task preference][Step 50/1000] loss=0.9186 | accuracy=0.5758
83
+ 2026-01-21 19:51:56 | INFO | [Task preference][Step 100/1000] loss=0.5793 | accuracy=0.7038
84
+ 2026-01-21 19:53:04 | INFO | [Task preference][Step 150/1000] loss=0.5304 | accuracy=0.7379
85
+ 2026-01-21 19:54:19 | INFO | [Task preference][Step 200/1000] loss=0.5223 | accuracy=0.7467
86
+ 2026-01-21 19:55:29 | INFO | [Task preference][Step 250/1000] loss=0.4683 | accuracy=0.7692
87
+ 2026-01-21 19:56:38 | INFO | [Task preference][Step 300/1000] loss=0.4672 | accuracy=0.7746
88
+ 2026-01-21 19:57:49 | INFO | [Task preference][Step 350/1000] loss=0.4507 | accuracy=0.7767
89
+ 2026-01-21 19:59:03 | INFO | [Task preference][Step 400/1000] loss=0.4282 | accuracy=0.7896
90
+ 2026-01-21 20:00:12 | INFO | [Task preference][Step 450/1000] loss=0.4177 | accuracy=0.8004
91
+ 2026-01-21 20:01:21 | INFO | [Task preference][Step 500/1000] loss=0.4111 | accuracy=0.8008
92
+ 2026-01-21 20:02:29 | INFO | [Task preference][Step 550/1000] loss=0.3968 | accuracy=0.8163
93
+ 2026-01-21 20:03:42 | INFO | [Task preference][Step 600/1000] loss=0.3853 | accuracy=0.8217
94
+ 2026-01-21 20:04:56 | INFO | [Task preference][Step 650/1000] loss=0.3910 | accuracy=0.8088
95
+ 2026-01-21 20:06:09 | INFO | [Task preference][Step 700/1000] loss=0.3718 | accuracy=0.8300
96
+ 2026-01-21 20:07:32 | INFO | [Task preference][Step 750/1000] loss=0.3781 | accuracy=0.8221
97
+ 2026-01-21 20:08:42 | INFO | [Task preference][Step 800/1000] loss=0.3685 | accuracy=0.8263
98
+ 2026-01-21 20:09:56 | INFO | [Task preference][Step 850/1000] loss=0.3704 | accuracy=0.8304
99
+ 2026-01-21 20:11:07 | INFO | [Task preference][Step 900/1000] loss=0.3680 | accuracy=0.8279
100
+ 2026-01-21 20:12:22 | INFO | [Task preference][Step 950/1000] loss=0.3623 | accuracy=0.8358
101
+ 2026-01-21 20:13:32 | INFO | [Task preference][Step 1000/1000] loss=0.3644 | accuracy=0.8296
102
+ 2026-01-21 20:13:44 | INFO | [Val] preference: loss=0.5676 | accuracy=0.7223
103
+ 2026-01-21 20:13:44 | INFO | Task 'preference' complete. Running validation...
104
+ 2026-01-21 20:13:52 | INFO | [Final Val for preference] loss=0.5676 | accuracy=0.7223
105
+ 2026-01-21 20:13:52 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/task_preference_final.pt
106
+ 2026-01-21 20:13:52 | INFO |
107
+ ============================================================
108
+ 2026-01-21 20:13:52 | INFO | All tasks complete. Running final validation for all tasks...
109
+ 2026-01-21 20:13:57 | INFO | [Final Val] musicality: loss=1.4280 | mae=1.4280
110
+ 2026-01-21 20:14:01 | INFO | [Final Val] alignment: loss=1.3554 | mae=1.3554
111
+ 2026-01-21 20:14:09 | INFO | [Final Val] preference: loss=0.5676 | accuracy=0.7223
112
+ 2026-01-21 20:14:09 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/downstream_final.pt
113
+ 2026-01-21 20:14:09 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_1942/ckpt/downstream_final.pt
downstream/20260121_2108/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: score_projector
20
+ musicality:
21
+ use_mlp: false
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: false
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: false
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 1000
51
+ log_interval: 50
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:0
downstream/20260121_2108/train.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 21:08:36 | INFO | Starting downstream training: 20260121_2108
2
+ 2026-01-21 21:08:36 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2108
3
+ 2026-01-21 21:08:36 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2108/config.yaml
4
+ 2026-01-21 21:08:36 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-21 21:08:36 | INFO | Dataset mode: sequential
6
+ 2026-01-21 21:08:39 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 21:08:39 | INFO | Using checkpoint config for model
8
+ 2026-01-21 21:08:44 | WARNING | Missing keys: 283
9
+ 2026-01-21 21:08:45 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 21:08:45 | INFO | Added linear head for task 'musicality'
11
+ 2026-01-21 21:08:45 | INFO | Added linear head for task 'alignment'
12
+ 2026-01-21 21:08:45 | INFO | Added linear head for task 'preference'
13
+ 2026-01-21 21:08:45 | INFO | Initializing heads from backbone 'score_projector'
14
+ 2026-01-21 21:08:45 | INFO | Initializing 3 heads from 'score_projector'
15
+ 2026-01-21 21:08:45 | INFO | Task 'musicality': type=linear, ordinal=False
16
+ 2026-01-21 21:08:45 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
17
+ 2026-01-21 21:08:45 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
18
+ 2026-01-21 21:08:45 | INFO | Initialized final linear layer from source
19
+ 2026-01-21 21:08:45 | INFO | ✓ Head initialized from 'score_projector'
20
+ 2026-01-21 21:08:45 | INFO | Task 'alignment': type=linear, ordinal=False
21
+ 2026-01-21 21:08:45 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
22
+ 2026-01-21 21:08:45 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
23
+ 2026-01-21 21:08:45 | INFO | Initialized final linear layer from source
24
+ 2026-01-21 21:08:45 | INFO | ✓ Head initialized from 'score_projector'
25
+ 2026-01-21 21:08:45 | INFO | Task 'preference': type=linear, ordinal=False
26
+ 2026-01-21 21:08:45 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
downstream/20260121_2112/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: score_projector
20
+ musicality:
21
+ use_mlp: false
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: false
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: false
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 1000
51
+ log_interval: 50
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:0
downstream/20260121_2112/train.log ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 21:12:23 | INFO | Starting downstream training: 20260121_2112
2
+ 2026-01-21 21:12:23 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2112
3
+ 2026-01-21 21:12:23 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2112/config.yaml
4
+ 2026-01-21 21:12:23 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-21 21:12:23 | INFO | Dataset mode: sequential
6
+ 2026-01-21 21:12:26 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 21:12:26 | INFO | Using checkpoint config for model
8
+ 2026-01-21 21:12:31 | WARNING | Missing keys: 283
9
+ 2026-01-21 21:12:32 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 21:12:32 | INFO | Added linear head for task 'musicality'
11
+ 2026-01-21 21:12:32 | INFO | Added linear head for task 'alignment'
12
+ 2026-01-21 21:12:32 | INFO | Added linear head for task 'preference'
13
+ 2026-01-21 21:12:32 | INFO | Initializing heads from backbone 'score_projector'
14
+ 2026-01-21 21:12:32 | INFO | Initializing 3 heads from 'score_projector'
15
+ 2026-01-21 21:12:32 | INFO | Task 'musicality': type=linear, ordinal=False
16
+ 2026-01-21 21:12:32 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
17
+ 2026-01-21 21:12:32 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
18
+ 2026-01-21 21:12:32 | INFO | Initialized final linear layer from source
19
+ 2026-01-21 21:12:32 | INFO | ✓ Head initialized from 'score_projector'
20
+ 2026-01-21 21:12:32 | INFO | Task 'alignment': type=linear, ordinal=False
21
+ 2026-01-21 21:12:32 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
22
+ 2026-01-21 21:12:32 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
23
+ 2026-01-21 21:12:32 | INFO | Initialized final linear layer from source
24
+ 2026-01-21 21:12:32 | INFO | ✓ Head initialized from 'score_projector'
25
+ 2026-01-21 21:12:32 | INFO | Task 'preference': type=linear, ordinal=False
26
+ 2026-01-21 21:12:32 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
27
+ 2026-01-21 21:12:32 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
28
+ 2026-01-21 21:12:32 | INFO | Initialized final linear layer from source
29
+ 2026-01-21 21:12:32 | INFO | ✓ Head initialized from 'score_projector'
30
+ 2026-01-21 21:12:32 | INFO | ✓ All heads initialized
31
+ 2026-01-21 21:12:32 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
32
+ 2026-01-21 21:12:32 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
33
+ 2026-01-21 21:12:32 | INFO | Task 'musicality': train=4322, test=913
34
+ 2026-01-21 21:12:32 | INFO | Task 'alignment': train=1923, test=913
35
+ 2026-01-21 21:12:32 | INFO | Task 'preference': train=1065, test=275
36
+ 2026-01-21 21:12:32 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
37
+ 2026-01-21 21:12:32 | INFO |
38
+ ============================================================
39
+ 2026-01-21 21:12:32 | INFO | Starting Task 1/3: musicality
40
+ 2026-01-21 21:12:32 | INFO | ============================================================
41
+ 2026-01-21 21:12:32 | INFO | Task 'musicality' trainable parameters: 769
downstream/20260121_2116/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: score_projector
20
+ musicality:
21
+ use_mlp: false
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: false
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: false
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 1000
51
+ log_interval: 50
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:0
downstream/20260121_2116/train.log ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 21:16:13 | INFO | Starting downstream training: 20260121_2116
2
+ 2026-01-21 21:16:13 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2116
3
+ 2026-01-21 21:16:13 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2116/config.yaml
4
+ 2026-01-21 21:16:13 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-21 21:16:13 | INFO | Dataset mode: sequential
6
+ 2026-01-21 21:16:16 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 21:16:16 | INFO | Using checkpoint config for model
8
+ 2026-01-21 21:16:22 | WARNING | Missing keys: 283
9
+ 2026-01-21 21:16:22 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 21:16:22 | INFO | Added linear head for task 'musicality'
11
+ 2026-01-21 21:16:22 | INFO | Added linear head for task 'alignment'
12
+ 2026-01-21 21:16:22 | INFO | Added linear head for task 'preference'
13
+ 2026-01-21 21:16:22 | INFO | Initializing heads from backbone 'score_projector'
14
+ 2026-01-21 21:16:22 | INFO | Initializing 3 heads from 'score_projector'
15
+ 2026-01-21 21:16:22 | INFO | Task 'musicality': type=linear, ordinal=False
16
+ 2026-01-21 21:16:22 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
17
+ 2026-01-21 21:16:23 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
18
+ 2026-01-21 21:16:23 | INFO | Initialized final linear layer from source
19
+ 2026-01-21 21:16:23 | INFO | ✓ Head initialized from 'score_projector'
20
+ 2026-01-21 21:16:23 | INFO | Task 'alignment': type=linear, ordinal=False
21
+ 2026-01-21 21:16:23 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
22
+ 2026-01-21 21:16:23 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
23
+ 2026-01-21 21:16:23 | INFO | Initialized final linear layer from source
24
+ 2026-01-21 21:16:23 | INFO | ✓ Head initialized from 'score_projector'
25
+ 2026-01-21 21:16:23 | INFO | Task 'preference': type=linear, ordinal=False
26
+ 2026-01-21 21:16:23 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
27
+ 2026-01-21 21:16:23 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
28
+ 2026-01-21 21:16:23 | INFO | Initialized final linear layer from source
29
+ 2026-01-21 21:16:23 | INFO | ✓ Head initialized from 'score_projector'
30
+ 2026-01-21 21:16:23 | INFO | ✓ All heads initialized
31
+ 2026-01-21 21:16:23 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
32
+ 2026-01-21 21:16:23 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
33
+ 2026-01-21 21:16:23 | INFO | Task 'musicality': train=4322, test=913
34
+ 2026-01-21 21:16:23 | INFO | Task 'alignment': train=1923, test=913
35
+ 2026-01-21 21:16:23 | INFO | Task 'preference': train=1065, test=275
36
+ 2026-01-21 21:16:23 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
37
+ 2026-01-21 21:16:23 | INFO |
38
+ ============================================================
39
+ 2026-01-21 21:16:23 | INFO | Starting Task 1/3: musicality
40
+ 2026-01-21 21:16:23 | INFO | ============================================================
41
+ 2026-01-21 21:16:23 | INFO | Task 'musicality' trainable parameters: 769
downstream/20260121_2117/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: score_projector
20
+ musicality:
21
+ use_mlp: false
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: false
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: false
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 1000
51
+ log_interval: 50
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:1
downstream/20260121_2117/train.log ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 21:17:16 | INFO | Starting downstream training: 20260121_2117
2
+ 2026-01-21 21:17:16 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117
3
+ 2026-01-21 21:17:16 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117/config.yaml
4
+ 2026-01-21 21:17:16 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-21 21:17:16 | INFO | Dataset mode: sequential
6
+ 2026-01-21 21:17:19 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 21:17:19 | INFO | Using checkpoint config for model
8
+ 2026-01-21 21:17:24 | WARNING | Missing keys: 283
9
+ 2026-01-21 21:17:25 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 21:17:25 | INFO | Added linear head for task 'musicality'
11
+ 2026-01-21 21:17:25 | INFO | Added linear head for task 'alignment'
12
+ 2026-01-21 21:17:25 | INFO | Added linear head for task 'preference'
13
+ 2026-01-21 21:17:25 | INFO | Initializing heads from backbone 'score_projector'
14
+ 2026-01-21 21:17:25 | INFO | Initializing 3 heads from 'score_projector'
15
+ 2026-01-21 21:17:25 | INFO | Task 'musicality': type=linear, ordinal=False
16
+ 2026-01-21 21:17:25 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
17
+ 2026-01-21 21:17:25 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
18
+ 2026-01-21 21:17:25 | INFO | Initialized final linear layer from source
19
+ 2026-01-21 21:17:25 | INFO | ✓ Head initialized from 'score_projector'
20
+ 2026-01-21 21:17:25 | INFO | Task 'alignment': type=linear, ordinal=False
21
+ 2026-01-21 21:17:25 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
22
+ 2026-01-21 21:17:25 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
23
+ 2026-01-21 21:17:25 | INFO | Initialized final linear layer from source
24
+ 2026-01-21 21:17:25 | INFO | ✓ Head initialized from 'score_projector'
25
+ 2026-01-21 21:17:25 | INFO | Task 'preference': type=linear, ordinal=False
26
+ 2026-01-21 21:17:25 | INFO | Initializing head from 'score_projector' (type=linear, from_ema=False)
27
+ 2026-01-21 21:17:25 | INFO | Warning: output dim mismatch (2 vs 1), using first 1 dims
28
+ 2026-01-21 21:17:25 | INFO | Initialized final linear layer from source
29
+ 2026-01-21 21:17:25 | INFO | ✓ Head initialized from 'score_projector'
30
+ 2026-01-21 21:17:25 | INFO | ✓ All heads initialized
31
+ 2026-01-21 21:17:25 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
32
+ 2026-01-21 21:17:25 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
33
+ 2026-01-21 21:17:25 | INFO | Task 'musicality': train=4322, test=913
34
+ 2026-01-21 21:17:25 | INFO | Task 'alignment': train=1923, test=913
35
+ 2026-01-21 21:17:25 | INFO | Task 'preference': train=1065, test=275
36
+ 2026-01-21 21:17:25 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
37
+ 2026-01-21 21:17:25 | INFO |
38
+ ============================================================
39
+ 2026-01-21 21:17:25 | INFO | Starting Task 1/3: musicality
40
+ 2026-01-21 21:17:25 | INFO | ============================================================
41
+ 2026-01-21 21:17:25 | INFO | Task 'musicality' trainable parameters: 769
42
+ 2026-01-21 21:17:43 | INFO | [Task musicality][Step 50/1000] loss=1.9747 | mae=1.9747
43
+ 2026-01-21 21:17:57 | INFO | [Task musicality][Step 100/1000] loss=0.9926 | mae=0.9926
44
+ 2026-01-21 21:18:07 | INFO | [Task musicality][Step 150/1000] loss=0.8076 | mae=0.8076
45
+ 2026-01-21 21:18:21 | INFO | [Task musicality][Step 200/1000] loss=0.7166 | mae=0.7166
46
+ 2026-01-21 21:18:32 | INFO | [Task musicality][Step 250/1000] loss=0.7024 | mae=0.7024
47
+ 2026-01-21 21:18:47 | INFO | [Task musicality][Step 300/1000] loss=0.6726 | mae=0.6726
48
+ 2026-01-21 21:18:57 | INFO | [Task musicality][Step 350/1000] loss=0.6763 | mae=0.6763
49
+ 2026-01-21 21:19:11 | INFO | [Task musicality][Step 400/1000] loss=0.6693 | mae=0.6693
50
+ 2026-01-21 21:19:23 | INFO | [Task musicality][Step 450/1000] loss=0.6402 | mae=0.6402
51
+ 2026-01-21 21:19:38 | INFO | [Task musicality][Step 500/1000] loss=0.5858 | mae=0.5858
52
+ 2026-01-21 21:19:51 | INFO | [Task musicality][Step 550/1000] loss=0.6195 | mae=0.6195
53
+ 2026-01-21 21:20:02 | INFO | [Task musicality][Step 600/1000] loss=0.5754 | mae=0.5754
54
+ 2026-01-21 21:20:17 | INFO | [Task musicality][Step 650/1000] loss=0.5761 | mae=0.5761
55
+ 2026-01-21 21:20:27 | INFO | [Task musicality][Step 700/1000] loss=0.5701 | mae=0.5701
56
+ 2026-01-21 21:20:40 | INFO | [Task musicality][Step 750/1000] loss=0.5714 | mae=0.5714
57
+ 2026-01-21 21:20:50 | INFO | [Task musicality][Step 800/1000] loss=0.5381 | mae=0.5381
58
+ 2026-01-21 21:21:04 | INFO | [Task musicality][Step 850/1000] loss=0.5339 | mae=0.5339
59
+ 2026-01-21 21:21:15 | INFO | [Task musicality][Step 900/1000] loss=0.5365 | mae=0.5365
60
+ 2026-01-21 21:21:28 | INFO | [Task musicality][Step 950/1000] loss=0.5234 | mae=0.5234
61
+ 2026-01-21 21:21:44 | INFO | [Task musicality][Step 1000/1000] loss=0.5216 | mae=0.5216
62
+ 2026-01-21 21:21:52 | INFO | [Val] musicality: loss=1.1082 | mae=1.1082
63
+ 2026-01-21 21:21:52 | INFO | Task 'musicality' complete. Running validation...
64
+ 2026-01-21 21:21:56 | INFO | [Final Val for musicality] loss=1.1082 | mae=1.1082
65
+ 2026-01-21 21:21:56 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117/ckpt/task_musicality_final.pt
66
+ 2026-01-21 21:21:56 | INFO |
67
+ ============================================================
68
+ 2026-01-21 21:21:56 | INFO | Starting Task 2/3: alignment
69
+ 2026-01-21 21:21:56 | INFO | ============================================================
70
+ 2026-01-21 21:21:56 | INFO | Task 'alignment' trainable parameters: 769
71
+ 2026-01-21 21:22:07 | INFO | [Task alignment][Step 50/1000] loss=2.4289 | mae=2.4289
72
+ 2026-01-21 21:22:18 | INFO | [Task alignment][Step 100/1000] loss=1.0527 | mae=1.0527
73
+ 2026-01-21 21:22:32 | INFO | [Task alignment][Step 150/1000] loss=0.8799 | mae=0.8799
74
+ 2026-01-21 21:22:44 | INFO | [Task alignment][Step 200/1000] loss=0.7955 | mae=0.7955
75
+ 2026-01-21 21:22:56 | INFO | [Task alignment][Step 250/1000] loss=0.7785 | mae=0.7785
76
+ 2026-01-21 21:23:04 | INFO | [Task alignment][Step 300/1000] loss=0.7468 | mae=0.7468
77
+ 2026-01-21 21:23:11 | INFO | [Task alignment][Step 350/1000] loss=0.7138 | mae=0.7138
78
+ 2026-01-21 21:23:19 | INFO | [Task alignment][Step 400/1000] loss=0.6950 | mae=0.6950
79
+ 2026-01-21 21:23:34 | INFO | [Task alignment][Step 450/1000] loss=0.6641 | mae=0.6641
80
+ 2026-01-21 21:23:47 | INFO | [Task alignment][Step 500/1000] loss=0.6494 | mae=0.6494
81
+ 2026-01-21 21:23:55 | INFO | [Task alignment][Step 550/1000] loss=0.6224 | mae=0.6224
82
+ 2026-01-21 21:24:08 | INFO | [Task alignment][Step 600/1000] loss=0.6417 | mae=0.6417
83
+ 2026-01-21 21:24:19 | INFO | [Task alignment][Step 650/1000] loss=0.6137 | mae=0.6137
84
+ 2026-01-21 21:24:28 | INFO | [Task alignment][Step 700/1000] loss=0.5973 | mae=0.5973
85
+ 2026-01-21 21:24:37 | INFO | [Task alignment][Step 750/1000] loss=0.5893 | mae=0.5893
86
+ 2026-01-21 21:24:47 | INFO | [Task alignment][Step 800/1000] loss=0.5758 | mae=0.5758
87
+ 2026-01-21 21:25:02 | INFO | [Task alignment][Step 850/1000] loss=0.5727 | mae=0.5727
88
+ 2026-01-21 21:25:13 | INFO | [Task alignment][Step 900/1000] loss=0.5572 | mae=0.5572
89
+ 2026-01-21 21:25:23 | INFO | [Task alignment][Step 950/1000] loss=0.5710 | mae=0.5710
90
+ 2026-01-21 21:25:34 | INFO | [Task alignment][Step 1000/1000] loss=0.5488 | mae=0.5488
91
+ 2026-01-21 21:25:38 | INFO | [Val] alignment: loss=1.2893 | mae=1.2893
92
+ 2026-01-21 21:25:38 | INFO | Task 'alignment' complete. Running validation...
93
+ 2026-01-21 21:25:43 | INFO | [Final Val for alignment] loss=1.2893 | mae=1.2893
94
+ 2026-01-21 21:25:43 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2117/ckpt/task_alignment_final.pt
95
+ 2026-01-21 21:25:43 | INFO |
96
+ ============================================================
97
+ 2026-01-21 21:25:43 | INFO | Starting Task 3/3: preference
98
+ 2026-01-21 21:25:43 | INFO | ============================================================
99
+ 2026-01-21 21:25:43 | INFO | Task 'preference' trainable parameters: 769
100
+ 2026-01-21 21:27:09 | INFO | [Task preference][Step 50/1000] loss=0.7531 | accuracy=0.6267
101
+ 2026-01-21 21:28:26 | INFO | [Task preference][Step 100/1000] loss=0.5513 | accuracy=0.7275
102
+ 2026-01-21 21:29:43 | INFO | [Task preference][Step 150/1000] loss=0.5132 | accuracy=0.7400
103
+ 2026-01-21 21:31:03 | INFO | [Task preference][Step 200/1000] loss=0.5026 | accuracy=0.7579
104
+ 2026-01-21 21:32:17 | INFO | [Task preference][Step 250/1000] loss=0.4554 | accuracy=0.7738
105
+ 2026-01-21 21:33:30 | INFO | [Task preference][Step 300/1000] loss=0.4522 | accuracy=0.7754
106
+ 2026-01-21 21:34:44 | INFO | [Task preference][Step 350/1000] loss=0.4400 | accuracy=0.7821
107
+ 2026-01-21 21:36:04 | INFO | [Task preference][Step 400/1000] loss=0.4237 | accuracy=0.7988
108
+ 2026-01-21 21:37:13 | INFO | [Task preference][Step 450/1000] loss=0.4104 | accuracy=0.8054
109
+ 2026-01-21 21:38:22 | INFO | [Task preference][Step 500/1000] loss=0.4061 | accuracy=0.8067
110
+ 2026-01-21 21:39:32 | INFO | [Task preference][Step 550/1000] loss=0.3888 | accuracy=0.8225
111
+ 2026-01-21 21:40:46 | INFO | [Task preference][Step 600/1000] loss=0.3797 | accuracy=0.8213
112
+ 2026-01-21 21:41:55 | INFO | [Task preference][Step 650/1000] loss=0.3849 | accuracy=0.8208
113
+ 2026-01-21 21:43:08 | INFO | [Task preference][Step 700/1000] loss=0.3678 | accuracy=0.8329
114
+ 2026-01-21 21:44:35 | INFO | [Task preference][Step 750/1000] loss=0.3720 | accuracy=0.8308
downstream/20260121_2145/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: score_projector
20
+ musicality:
21
+ use_mlp: true
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: true
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: true
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 1000
51
+ log_interval: 50
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:2
downstream/20260121_2145/train.log ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 21:45:31 | INFO | Starting downstream training: 20260121_2145
2
+ 2026-01-21 21:45:31 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145
3
+ 2026-01-21 21:45:31 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/config.yaml
4
+ 2026-01-21 21:45:31 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-21 21:45:31 | INFO | Dataset mode: sequential
6
+ 2026-01-21 21:45:34 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 21:45:34 | INFO | Using checkpoint config for model
8
+ 2026-01-21 21:45:39 | WARNING | Missing keys: 283
9
+ 2026-01-21 21:45:40 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 21:45:40 | INFO | Added mlp head for task 'musicality'
11
+ 2026-01-21 21:45:40 | INFO | Added mlp head for task 'alignment'
12
+ 2026-01-21 21:45:40 | INFO | Added mlp head for task 'preference'
13
+ 2026-01-21 21:45:40 | INFO | Initializing heads from backbone 'score_projector'
14
+ 2026-01-21 21:45:40 | INFO | Initializing 3 heads from 'score_projector'
15
+ 2026-01-21 21:45:40 | INFO | Task 'musicality': type=mlp, ordinal=False
16
+ 2026-01-21 21:45:40 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
17
+ 2026-01-21 21:45:40 | INFO | Loaded 4 parameters, 2 missing
18
+ 2026-01-21 21:45:40 | INFO | ✓ Head initialized from 'score_projector'
19
+ 2026-01-21 21:45:40 | INFO | Task 'alignment': type=mlp, ordinal=False
20
+ 2026-01-21 21:45:40 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
21
+ 2026-01-21 21:45:40 | INFO | Loaded 4 parameters, 2 missing
22
+ 2026-01-21 21:45:40 | INFO | ✓ Head initialized from 'score_projector'
23
+ 2026-01-21 21:45:40 | INFO | Task 'preference': type=mlp, ordinal=False
24
+ 2026-01-21 21:45:40 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
25
+ 2026-01-21 21:45:40 | INFO | Loaded 4 parameters, 2 missing
26
+ 2026-01-21 21:45:40 | INFO | ✓ Head initialized from 'score_projector'
27
+ 2026-01-21 21:45:40 | INFO | ✓ All heads initialized
28
+ 2026-01-21 21:45:40 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
29
+ 2026-01-21 21:45:40 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
30
+ 2026-01-21 21:45:40 | INFO | Task 'musicality': train=4322, test=913
31
+ 2026-01-21 21:45:40 | INFO | Task 'alignment': train=1923, test=913
32
+ 2026-01-21 21:45:40 | INFO | Task 'preference': train=1065, test=275
33
+ 2026-01-21 21:45:40 | INFO | [SEQUENTIAL MODE] Training 3 tasks, 1000 steps each
34
+ 2026-01-21 21:45:40 | INFO |
35
+ ============================================================
36
+ 2026-01-21 21:45:40 | INFO | Starting Task 1/3: musicality
37
+ 2026-01-21 21:45:40 | INFO | ============================================================
38
+ 2026-01-21 21:45:40 | INFO | Task 'musicality' trainable parameters: 592,897
39
+ 2026-01-21 21:46:00 | INFO | [Task musicality][Step 50/1000] loss=1.7786 | mae=1.7786
40
+ 2026-01-21 21:46:18 | INFO | [Task musicality][Step 100/1000] loss=0.7543 | mae=0.7543
41
+ 2026-01-21 21:46:30 | INFO | [Task musicality][Step 150/1000] loss=0.6762 | mae=0.6762
42
+ 2026-01-21 21:46:45 | INFO | [Task musicality][Step 200/1000] loss=0.6404 | mae=0.6404
43
+ 2026-01-21 21:46:59 | INFO | [Task musicality][Step 250/1000] loss=0.5936 | mae=0.5936
44
+ 2026-01-21 21:47:15 | INFO | [Task musicality][Step 300/1000] loss=0.5754 | mae=0.5754
45
+ 2026-01-21 21:47:30 | INFO | [Task musicality][Step 350/1000] loss=0.5510 | mae=0.5510
46
+ 2026-01-21 21:47:43 | INFO | [Task musicality][Step 400/1000] loss=0.5132 | mae=0.5132
47
+ 2026-01-21 21:47:54 | INFO | [Task musicality][Step 450/1000] loss=0.5287 | mae=0.5287
48
+ 2026-01-21 21:48:12 | INFO | [Task musicality][Step 500/1000] loss=0.4958 | mae=0.4958
49
+ 2026-01-21 21:48:30 | INFO | [Task musicality][Step 550/1000] loss=0.4961 | mae=0.4961
50
+ 2026-01-21 21:48:43 | INFO | [Task musicality][Step 600/1000] loss=0.4833 | mae=0.4833
51
+ 2026-01-21 21:48:59 | INFO | [Task musicality][Step 650/1000] loss=0.4410 | mae=0.4410
52
+ 2026-01-21 21:49:14 | INFO | [Task musicality][Step 700/1000] loss=0.4480 | mae=0.4480
53
+ 2026-01-21 21:49:34 | INFO | [Task musicality][Step 750/1000] loss=0.4393 | mae=0.4393
54
+ 2026-01-21 21:49:49 | INFO | [Task musicality][Step 800/1000] loss=0.4295 | mae=0.4295
55
+ 2026-01-21 21:50:07 | INFO | [Task musicality][Step 850/1000] loss=0.4026 | mae=0.4026
56
+ 2026-01-21 21:50:23 | INFO | [Task musicality][Step 900/1000] loss=0.4080 | mae=0.4080
57
+ 2026-01-21 21:50:41 | INFO | [Task musicality][Step 950/1000] loss=0.3985 | mae=0.3985
58
+ 2026-01-21 21:50:58 | INFO | [Task musicality][Step 1000/1000] loss=0.4006 | mae=0.4006
59
+ 2026-01-21 21:51:05 | INFO | [Val] musicality: loss=0.6058 | mae=0.6058
60
+ 2026-01-21 21:51:05 | INFO | Task 'musicality' complete. Running validation...
61
+ 2026-01-21 21:51:10 | INFO | [Final Val for musicality] loss=0.6058 | mae=0.6058
62
+ 2026-01-21 21:51:10 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/task_musicality_final.pt
63
+ 2026-01-21 21:51:10 | INFO |
64
+ ============================================================
65
+ 2026-01-21 21:51:10 | INFO | Starting Task 2/3: alignment
66
+ 2026-01-21 21:51:10 | INFO | ============================================================
67
+ 2026-01-21 21:51:10 | INFO | Task 'alignment' trainable parameters: 592,897
68
+ 2026-01-21 21:51:26 | INFO | [Task alignment][Step 50/1000] loss=1.8262 | mae=1.8262
69
+ 2026-01-21 21:51:38 | INFO | [Task alignment][Step 100/1000] loss=0.7283 | mae=0.7283
70
+ 2026-01-21 21:51:50 | INFO | [Task alignment][Step 150/1000] loss=0.6792 | mae=0.6792
71
+ 2026-01-21 21:52:03 | INFO | [Task alignment][Step 200/1000] loss=0.5979 | mae=0.5979
72
+ 2026-01-21 21:52:14 | INFO | [Task alignment][Step 250/1000] loss=0.5766 | mae=0.5766
73
+ 2026-01-21 21:52:24 | INFO | [Task alignment][Step 300/1000] loss=0.5427 | mae=0.5427
74
+ 2026-01-21 21:52:34 | INFO | [Task alignment][Step 350/1000] loss=0.5330 | mae=0.5330
75
+ 2026-01-21 21:52:40 | INFO | [Task alignment][Step 400/1000] loss=0.5115 | mae=0.5115
76
+ 2026-01-21 21:52:48 | INFO | [Task alignment][Step 450/1000] loss=0.4688 | mae=0.4688
77
+ 2026-01-21 21:52:54 | INFO | [Task alignment][Step 500/1000] loss=0.4563 | mae=0.4563
78
+ 2026-01-21 21:53:00 | INFO | [Task alignment][Step 550/1000] loss=0.4392 | mae=0.4392
79
+ 2026-01-21 21:53:10 | INFO | [Task alignment][Step 600/1000] loss=0.4127 | mae=0.4127
80
+ 2026-01-21 21:53:20 | INFO | [Task alignment][Step 650/1000] loss=0.3947 | mae=0.3947
81
+ 2026-01-21 21:53:28 | INFO | [Task alignment][Step 700/1000] loss=0.3776 | mae=0.3776
82
+ 2026-01-21 21:53:37 | INFO | [Task alignment][Step 750/1000] loss=0.3523 | mae=0.3523
83
+ 2026-01-21 21:53:45 | INFO | [Task alignment][Step 800/1000] loss=0.3439 | mae=0.3439
84
+ 2026-01-21 21:53:55 | INFO | [Task alignment][Step 850/1000] loss=0.3254 | mae=0.3254
85
+ 2026-01-21 21:54:08 | INFO | [Task alignment][Step 900/1000] loss=0.3240 | mae=0.3240
86
+ 2026-01-21 21:54:21 | INFO | [Task alignment][Step 950/1000] loss=0.3232 | mae=0.3232
87
+ 2026-01-21 21:54:33 | INFO | [Task alignment][Step 1000/1000] loss=0.3232 | mae=0.3232
88
+ 2026-01-21 21:54:39 | INFO | [Val] alignment: loss=0.6060 | mae=0.6060
89
+ 2026-01-21 21:54:39 | INFO | Task 'alignment' complete. Running validation...
90
+ 2026-01-21 21:54:44 | INFO | [Final Val for alignment] loss=0.6060 | mae=0.6060
91
+ 2026-01-21 21:54:44 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/task_alignment_final.pt
92
+ 2026-01-21 21:54:44 | INFO |
93
+ ============================================================
94
+ 2026-01-21 21:54:44 | INFO | Starting Task 3/3: preference
95
+ 2026-01-21 21:54:44 | INFO | ============================================================
96
+ 2026-01-21 21:54:44 | INFO | Task 'preference' trainable parameters: 592,897
97
+ 2026-01-21 21:56:24 | INFO | [Task preference][Step 50/1000] loss=0.5985 | accuracy=0.6663
98
+ 2026-01-21 21:57:46 | INFO | [Task preference][Step 100/1000] loss=0.4507 | accuracy=0.7896
99
+ 2026-01-21 21:59:02 | INFO | [Task preference][Step 150/1000] loss=0.3691 | accuracy=0.8363
100
+ 2026-01-21 22:00:22 | INFO | [Task preference][Step 200/1000] loss=0.2699 | accuracy=0.8992
101
+ 2026-01-21 22:01:38 | INFO | [Task preference][Step 250/1000] loss=0.1960 | accuracy=0.9267
102
+ 2026-01-21 22:02:51 | INFO | [Task preference][Step 300/1000] loss=0.1390 | accuracy=0.9500
103
+ 2026-01-21 22:04:05 | INFO | [Task preference][Step 350/1000] loss=0.1032 | accuracy=0.9683
104
+ 2026-01-21 22:05:24 | INFO | [Task preference][Step 400/1000] loss=0.0626 | accuracy=0.9842
105
+ 2026-01-21 22:06:58 | INFO | [Task preference][Step 450/1000] loss=0.0451 | accuracy=0.9908
106
+ 2026-01-21 22:08:26 | INFO | [Task preference][Step 500/1000] loss=0.0280 | accuracy=0.9958
107
+ 2026-01-21 22:09:49 | INFO | [Task preference][Step 550/1000] loss=0.0195 | accuracy=0.9979
108
+ 2026-01-21 22:11:22 | INFO | [Task preference][Step 600/1000] loss=0.0139 | accuracy=0.9996
109
+ 2026-01-21 22:12:48 | INFO | [Task preference][Step 650/1000] loss=0.0127 | accuracy=0.9988
110
+ 2026-01-21 22:14:19 | INFO | [Task preference][Step 700/1000] loss=0.0113 | accuracy=0.9992
111
+ 2026-01-21 22:15:52 | INFO | [Task preference][Step 750/1000] loss=0.0107 | accuracy=0.9996
112
+ 2026-01-21 22:17:16 | INFO | [Task preference][Step 800/1000] loss=0.0084 | accuracy=1.0000
113
+ 2026-01-21 22:18:49 | INFO | [Task preference][Step 850/1000] loss=0.0088 | accuracy=1.0000
114
+ 2026-01-21 22:20:27 | INFO | [Task preference][Step 900/1000] loss=0.0086 | accuracy=0.9996
115
+ 2026-01-21 22:22:09 | INFO | [Task preference][Step 950/1000] loss=0.0077 | accuracy=0.9996
116
+ 2026-01-21 22:23:32 | INFO | [Task preference][Step 1000/1000] loss=0.0081 | accuracy=0.9996
117
+ 2026-01-21 22:23:45 | INFO | [Val] preference: loss=1.1195 | accuracy=0.7176
118
+ 2026-01-21 22:23:45 | INFO | Task 'preference' complete. Running validation...
119
+ 2026-01-21 22:23:55 | INFO | [Final Val for preference] loss=1.1195 | accuracy=0.7176
120
+ 2026-01-21 22:23:55 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/task_preference_final.pt
121
+ 2026-01-21 22:23:55 | INFO |
122
+ ============================================================
123
+ 2026-01-21 22:23:55 | INFO | All tasks complete. Running final validation for all tasks...
124
+ 2026-01-21 22:24:01 | INFO | [Final Val] musicality: loss=0.6058 | mae=0.6058
125
+ 2026-01-21 22:24:08 | INFO | [Final Val] alignment: loss=0.6060 | mae=0.6060
126
+ 2026-01-21 22:24:16 | INFO | [Final Val] preference: loss=1.1195 | accuracy=0.7176
127
+ 2026-01-21 22:24:16 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/downstream_final.pt
128
+ 2026-01-21 22:24:16 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2145/ckpt/downstream_final.pt
downstream/20260121_2200/config.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ backbone:
10
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ freeze: true
12
+ from_ema: false
13
+ dataset:
14
+ train_file: ${project_root}/train_multitask.jsonl
15
+ test_file: ${project_root}/test_multitask.jsonl
16
+ heads:
17
+ hidden_dim: 768
18
+ init_from: score_projector
19
+ musicality:
20
+ use_mlp: true
21
+ ordinal: false
22
+ dropout: 0.1
23
+ num_categories: 9
24
+ y_min: 1.0
25
+ y_max: 5.0
26
+ step: 0.5
27
+ type: ordinal
28
+ tau: 1.0
29
+ alignment:
30
+ use_mlp: true
31
+ ordinal: false
32
+ dropout: 0.1
33
+ num_categories: 9
34
+ y_min: 1.0
35
+ y_max: 5.0
36
+ step: 0.5
37
+ type: ordinal
38
+ tau: 1.0
39
+ preference:
40
+ use_mlp: true
41
+ dropout: 0.1
42
+ train:
43
+ num_epochs: 10
44
+ num_train_steps: 2000
45
+ batch_size: 48
46
+ learning_rate: 0.001
47
+ weight_decay: 0.01
48
+ max_grad_norm: 1.0
49
+ warmup_steps: 100
50
+ schedule_type: cosine
51
+ min_lr_ratio: 0.01
52
+ dataset_mode: sequential
53
+ steps_per_task: 1000
54
+ log_interval: 50
55
+ val_interval: 1000
56
+ save_interval: 1000
57
+ num_workers: 8
58
+ resume: null
59
+ device: cuda:2
downstream/20260121_2200/train.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 22:00:33 | INFO | Starting downstream training: 20260121_2200
2
+ 2026-01-21 22:00:33 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2200
3
+ 2026-01-21 22:00:33 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2200/config.yaml
4
+ 2026-01-21 22:00:33 | INFO | Training tasks: ['musicality', 'alignment']
5
+ 2026-01-21 22:00:33 | INFO | Dataset mode: sequential
6
+ 2026-01-21 22:00:35 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 22:00:35 | INFO | Using checkpoint config for model
8
+ 2026-01-21 22:00:41 | WARNING | Missing keys: 283
downstream/20260121_2202/config.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ backbone:
10
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ freeze: true
12
+ from_ema: false
13
+ dataset:
14
+ train_file: ${project_root}/train_multitask.jsonl
15
+ test_file: ${project_root}/test_multitask.jsonl
16
+ heads:
17
+ hidden_dim: 768
18
+ init_from: score_projector
19
+ musicality:
20
+ use_mlp: true
21
+ ordinal: false
22
+ dropout: 0.1
23
+ num_categories: 9
24
+ y_min: 1.0
25
+ y_max: 5.0
26
+ step: 0.5
27
+ type: ordinal
28
+ tau: 1.0
29
+ alignment:
30
+ use_mlp: true
31
+ ordinal: false
32
+ dropout: 0.1
33
+ num_categories: 9
34
+ y_min: 1.0
35
+ y_max: 5.0
36
+ step: 0.5
37
+ type: ordinal
38
+ tau: 1.0
39
+ preference:
40
+ use_mlp: true
41
+ dropout: 0.1
42
+ train:
43
+ num_epochs: 10
44
+ num_train_steps: 2000
45
+ batch_size: 48
46
+ learning_rate: 0.001
47
+ weight_decay: 0.01
48
+ max_grad_norm: 1.0
49
+ warmup_steps: 100
50
+ schedule_type: cosine
51
+ min_lr_ratio: 0.01
52
+ dataset_mode: sequential
53
+ steps_per_task: 1000
54
+ log_interval: 50
55
+ val_interval: 1000
56
+ save_interval: 1000
57
+ num_workers: 8
58
+ resume: null
59
+ device: cuda:3
downstream/20260121_2202/train.log ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 22:02:15 | INFO | Starting downstream training: 20260121_2202
2
+ 2026-01-21 22:02:15 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2202
3
+ 2026-01-21 22:02:15 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2202/config.yaml
4
+ 2026-01-21 22:02:15 | INFO | Training tasks: ['musicality', 'alignment']
5
+ 2026-01-21 22:02:15 | INFO | Dataset mode: sequential
6
+ 2026-01-21 22:02:18 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 22:02:18 | INFO | Using checkpoint config for model
8
+ 2026-01-21 22:02:24 | WARNING | Missing keys: 283
9
+ 2026-01-21 22:02:24 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 22:02:24 | INFO | Added mlp head for task 'musicality'
11
+ 2026-01-21 22:02:24 | INFO | Added mlp head for task 'alignment'
12
+ 2026-01-21 22:02:24 | INFO | Initializing heads from backbone 'score_projector'
13
+ 2026-01-21 22:02:24 | INFO | Initializing 2 heads from 'score_projector'
14
+ 2026-01-21 22:02:24 | INFO | Task 'musicality': type=mlp, ordinal=False
15
+ 2026-01-21 22:02:24 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
16
+ 2026-01-21 22:02:24 | INFO | Loaded 4 parameters, 2 missing
17
+ 2026-01-21 22:02:24 | INFO | ✓ Head initialized from 'score_projector'
18
+ 2026-01-21 22:02:24 | INFO | Task 'alignment': type=mlp, ordinal=False
19
+ 2026-01-21 22:02:24 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
20
+ 2026-01-21 22:02:24 | INFO | Loaded 4 parameters, 2 missing
21
+ 2026-01-21 22:02:24 | INFO | ✓ Head initialized from 'score_projector'
22
+ 2026-01-21 22:02:24 | INFO | ✓ All heads initialized
23
+ 2026-01-21 22:02:24 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
24
+ 2026-01-21 22:02:24 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
25
+ 2026-01-21 22:02:24 | INFO | Task 'musicality': train=4322, test=913
26
+ 2026-01-21 22:02:24 | INFO | Task 'alignment': train=1923, test=913
27
+ 2026-01-21 22:02:24 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 1000 steps each
28
+ 2026-01-21 22:02:24 | INFO |
29
+ ============================================================
30
+ 2026-01-21 22:02:24 | INFO | Starting Task 1/2: musicality
31
+ 2026-01-21 22:02:24 | INFO | ============================================================
32
+ 2026-01-21 22:02:24 | INFO | Task 'musicality' trainable parameters: 592,897
33
+ 2026-01-21 22:02:44 | INFO | [Task musicality][Step 50/1000] loss=1.7686 | mae=1.7686
34
+ 2026-01-21 22:02:58 | INFO | [Task musicality][Step 100/1000] loss=0.7030 | mae=0.7030
downstream/20260121_2203/config.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ backbone:
10
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ freeze: true
12
+ from_ema: false
13
+ dataset:
14
+ train_file: ${project_root}/train_multitask.jsonl
15
+ test_file: ${project_root}/test_multitask.jsonl
16
+ heads:
17
+ hidden_dim: 768
18
+ init_from: score_projector
19
+ musicality:
20
+ use_mlp: true
21
+ ordinal: true
22
+ dropout: 0.1
23
+ num_categories: 9
24
+ y_min: 1.0
25
+ y_max: 5.0
26
+ step: 0.5
27
+ alignment:
28
+ use_mlp: true
29
+ ordinal: true
30
+ dropout: 0.1
31
+ num_categories: 9
32
+ y_min: 1.0
33
+ y_max: 5.0
34
+ step: 0.5
35
+ preference:
36
+ use_mlp: true
37
+ dropout: 0.1
38
+ train:
39
+ num_epochs: 10
40
+ num_train_steps: 2000
41
+ batch_size: 48
42
+ learning_rate: 0.001
43
+ weight_decay: 0.01
44
+ max_grad_norm: 1.0
45
+ warmup_steps: 100
46
+ schedule_type: cosine
47
+ min_lr_ratio: 0.01
48
+ dataset_mode: sequential
49
+ steps_per_task: 1000
50
+ log_interval: 50
51
+ val_interval: 1000
52
+ save_interval: 1000
53
+ num_workers: 8
54
+ resume: null
55
+ device: cuda:3
downstream/20260121_2203/train.log ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 22:03:30 | INFO | Starting downstream training: 20260121_2203
2
+ 2026-01-21 22:03:30 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203
3
+ 2026-01-21 22:03:30 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/config.yaml
4
+ 2026-01-21 22:03:30 | INFO | Training tasks: ['musicality', 'alignment']
5
+ 2026-01-21 22:03:30 | INFO | Dataset mode: sequential
6
+ 2026-01-21 22:03:32 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 22:03:33 | INFO | Using checkpoint config for model
8
+ 2026-01-21 22:03:38 | WARNING | Missing keys: 283
9
+ 2026-01-21 22:03:39 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 22:03:39 | INFO | Added ordinal head for task 'musicality'
11
+ 2026-01-21 22:03:39 | INFO | Added ordinal head for task 'alignment'
12
+ 2026-01-21 22:03:39 | INFO | Initializing heads from backbone 'score_projector'
13
+ 2026-01-21 22:03:39 | INFO | Initializing 2 heads from 'score_projector'
14
+ 2026-01-21 22:03:39 | INFO | Task 'musicality': type=mlp, ordinal=True
15
+ 2026-01-21 22:03:39 | INFO | Initializing Ordinal head from 'score_projector' (from_ema=False)
16
+ 2026-01-21 22:03:39 | INFO | Loaded 4 parameters, 2 missing
17
+ 2026-01-21 22:03:39 | INFO | ✓ Ordinal head MLP initialized from 'score_projector'
18
+ 2026-01-21 22:03:39 | INFO | Note: Ordinal thresholds remain randomly initialized
19
+ 2026-01-21 22:03:39 | INFO | Task 'alignment': type=mlp, ordinal=True
20
+ 2026-01-21 22:03:39 | INFO | Initializing Ordinal head from 'score_projector' (from_ema=False)
21
+ 2026-01-21 22:03:39 | INFO | Loaded 4 parameters, 2 missing
22
+ 2026-01-21 22:03:39 | INFO | ✓ Ordinal head MLP initialized from 'score_projector'
23
+ 2026-01-21 22:03:39 | INFO | Note: Ordinal thresholds remain randomly initialized
24
+ 2026-01-21 22:03:39 | INFO | ✓ All heads initialized
25
+ 2026-01-21 22:03:39 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
26
+ 2026-01-21 22:03:39 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
27
+ 2026-01-21 22:03:39 | INFO | Task 'musicality': train=4322, test=913
28
+ 2026-01-21 22:03:39 | INFO | Task 'alignment': train=1923, test=913
29
+ 2026-01-21 22:03:39 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 1000 steps each
30
+ 2026-01-21 22:03:39 | INFO |
31
+ ============================================================
32
+ 2026-01-21 22:03:39 | INFO | Starting Task 1/2: musicality
33
+ 2026-01-21 22:03:39 | INFO | ============================================================
34
+ 2026-01-21 22:03:39 | INFO | Task 'musicality' trainable parameters: 592,906
35
+ 2026-01-21 22:04:02 | INFO | [Task musicality][Step 50/1000] loss=2.0734 | mae=0.7631
36
+ 2026-01-21 22:04:18 | INFO | [Task musicality][Step 100/1000] loss=1.9539 | mae=0.5993
37
+ 2026-01-21 22:04:30 | INFO | [Task musicality][Step 150/1000] loss=1.9282 | mae=0.5796
38
+ 2026-01-21 22:04:46 | INFO | [Task musicality][Step 200/1000] loss=1.8968 | mae=0.5612
39
+ 2026-01-21 22:04:58 | INFO | [Task musicality][Step 250/1000] loss=1.8660 | mae=0.5441
40
+ 2026-01-21 22:05:13 | INFO | [Task musicality][Step 300/1000] loss=1.8148 | mae=0.5014
41
+ 2026-01-21 22:05:26 | INFO | [Task musicality][Step 350/1000] loss=1.8131 | mae=0.5150
42
+ 2026-01-21 22:05:43 | INFO | [Task musicality][Step 400/1000] loss=1.7905 | mae=0.5027
43
+ 2026-01-21 22:05:57 | INFO | [Task musicality][Step 450/1000] loss=1.7558 | mae=0.4793
44
+ 2026-01-21 22:06:14 | INFO | [Task musicality][Step 500/1000] loss=1.7351 | mae=0.4567
45
+ 2026-01-21 22:06:31 | INFO | [Task musicality][Step 550/1000] loss=1.7292 | mae=0.4727
46
+ 2026-01-21 22:06:44 | INFO | [Task musicality][Step 600/1000] loss=1.6897 | mae=0.4319
47
+ 2026-01-21 22:07:02 | INFO | [Task musicality][Step 650/1000] loss=1.6883 | mae=0.4249
48
+ 2026-01-21 22:07:16 | INFO | [Task musicality][Step 700/1000] loss=1.6641 | mae=0.4092
49
+ 2026-01-21 22:07:35 | INFO | [Task musicality][Step 750/1000] loss=1.6474 | mae=0.3982
50
+ 2026-01-21 22:07:48 | INFO | [Task musicality][Step 800/1000] loss=1.6376 | mae=0.3938
51
+ 2026-01-21 22:08:06 | INFO | [Task musicality][Step 850/1000] loss=1.6277 | mae=0.3840
52
+ 2026-01-21 22:08:19 | INFO | [Task musicality][Step 900/1000] loss=1.6306 | mae=0.3790
53
+ 2026-01-21 22:08:35 | INFO | [Task musicality][Step 950/1000] loss=1.6176 | mae=0.3722
54
+ 2026-01-21 22:08:53 | INFO | [Task musicality][Step 1000/1000] loss=1.6133 | mae=0.3613
55
+ 2026-01-21 22:09:02 | INFO | [Val] musicality: loss=2.0307 | mae=0.7057
56
+ 2026-01-21 22:09:02 | INFO | Task 'musicality' complete. Running validation...
57
+ 2026-01-21 22:09:09 | INFO | [Final Val for musicality] loss=2.0307 | mae=0.7057
58
+ 2026-01-21 22:09:09 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/task_musicality_final.pt
59
+ 2026-01-21 22:09:09 | INFO |
60
+ ============================================================
61
+ 2026-01-21 22:09:09 | INFO | Starting Task 2/2: alignment
62
+ 2026-01-21 22:09:09 | INFO | ============================================================
63
+ 2026-01-21 22:09:09 | INFO | Task 'alignment' trainable parameters: 592,906
64
+ 2026-01-21 22:09:22 | INFO | [Task alignment][Step 50/1000] loss=2.0490 | mae=0.6992
65
+ 2026-01-21 22:09:34 | INFO | [Task alignment][Step 100/1000] loss=1.9639 | mae=0.5939
66
+ 2026-01-21 22:09:47 | INFO | [Task alignment][Step 150/1000] loss=1.9356 | mae=0.5761
67
+ 2026-01-21 22:09:59 | INFO | [Task alignment][Step 200/1000] loss=1.8931 | mae=0.5401
68
+ 2026-01-21 22:10:12 | INFO | [Task alignment][Step 250/1000] loss=1.8464 | mae=0.5062
69
+ 2026-01-21 22:10:25 | INFO | [Task alignment][Step 300/1000] loss=1.8200 | mae=0.4874
70
+ 2026-01-21 22:10:37 | INFO | [Task alignment][Step 350/1000] loss=1.7858 | mae=0.4652
71
+ 2026-01-21 22:10:47 | INFO | [Task alignment][Step 400/1000] loss=1.7651 | mae=0.4574
72
+ 2026-01-21 22:11:04 | INFO | [Task alignment][Step 450/1000] loss=1.7110 | mae=0.4072
73
+ 2026-01-21 22:11:17 | INFO | [Task alignment][Step 500/1000] loss=1.6871 | mae=0.3807
74
+ 2026-01-21 22:11:30 | INFO | [Task alignment][Step 550/1000] loss=1.6525 | mae=0.3685
75
+ 2026-01-21 22:11:43 | INFO | [Task alignment][Step 600/1000] loss=1.6413 | mae=0.3528
76
+ 2026-01-21 22:11:58 | INFO | [Task alignment][Step 650/1000] loss=1.6069 | mae=0.3283
77
+ 2026-01-21 22:12:12 | INFO | [Task alignment][Step 700/1000] loss=1.5852 | mae=0.3045
78
+ 2026-01-21 22:12:26 | INFO | [Task alignment][Step 750/1000] loss=1.5635 | mae=0.2865
79
+ 2026-01-21 22:12:38 | INFO | [Task alignment][Step 800/1000] loss=1.5526 | mae=0.2740
80
+ 2026-01-21 22:12:53 | INFO | [Task alignment][Step 850/1000] loss=1.5392 | mae=0.2679
81
+ 2026-01-21 22:13:07 | INFO | [Task alignment][Step 900/1000] loss=1.5294 | mae=0.2544
82
+ 2026-01-21 22:13:21 | INFO | [Task alignment][Step 950/1000] loss=1.5273 | mae=0.2547
83
+ 2026-01-21 22:13:36 | INFO | [Task alignment][Step 1000/1000] loss=1.5287 | mae=0.2516
84
+ 2026-01-21 22:13:44 | INFO | [Val] alignment: loss=1.9828 | mae=0.6563
85
+ 2026-01-21 22:13:44 | INFO | Task 'alignment' complete. Running validation...
86
+ 2026-01-21 22:13:52 | INFO | [Final Val for alignment] loss=1.9828 | mae=0.6563
87
+ 2026-01-21 22:13:52 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/task_alignment_final.pt
88
+ 2026-01-21 22:13:52 | INFO |
89
+ ============================================================
90
+ 2026-01-21 22:13:52 | INFO | All tasks complete. Running final validation for all tasks...
91
+ 2026-01-21 22:14:00 | INFO | [Final Val] musicality: loss=2.0307 | mae=0.7057
92
+ 2026-01-21 22:14:07 | INFO | [Final Val] alignment: loss=1.9828 | mae=0.6563
93
+ 2026-01-21 22:14:07 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/downstream_final.pt
94
+ 2026-01-21 22:14:07 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2203/ckpt/downstream_final.pt
downstream/20260121_2243/config.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ backbone:
10
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ freeze: true
12
+ from_ema: false
13
+ dataset:
14
+ train_file: ${project_root}/train_multitask.jsonl
15
+ test_file: ${project_root}/test_multitask.jsonl
16
+ heads:
17
+ hidden_dim: 768
18
+ init_from: score_projector
19
+ musicality:
20
+ use_mlp: true
21
+ ordinal: false
22
+ dropout: 0.1
23
+ num_categories: 9
24
+ y_min: 1.0
25
+ y_max: 5.0
26
+ step: 0.5
27
+ alignment:
28
+ use_mlp: true
29
+ ordinal: false
30
+ dropout: 0.1
31
+ num_categories: 9
32
+ y_min: 1.0
33
+ y_max: 5.0
34
+ step: 0.5
35
+ preference:
36
+ use_mlp: true
37
+ dropout: 0.1
38
+ train:
39
+ num_epochs: 10
40
+ num_train_steps: 2000
41
+ batch_size: 48
42
+ learning_rate: 0.001
43
+ weight_decay: 0.01
44
+ max_grad_norm: 1.0
45
+ warmup_steps: 100
46
+ schedule_type: cosine
47
+ min_lr_ratio: 0.01
48
+ dataset_mode: sequential
49
+ steps_per_task: 5000
50
+ log_interval: 200
51
+ val_interval: 1000
52
+ save_interval: 1000
53
+ num_workers: 8
54
+ resume: null
55
+ device: cuda:3
downstream/20260121_2243/train.log ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 22:43:58 | INFO | Starting downstream training: 20260121_2243
2
+ 2026-01-21 22:43:58 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243
3
+ 2026-01-21 22:43:58 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/config.yaml
4
+ 2026-01-21 22:43:58 | INFO | Training tasks: ['musicality', 'alignment']
5
+ 2026-01-21 22:43:58 | INFO | Dataset mode: sequential
6
+ 2026-01-21 22:44:01 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-21 22:44:01 | INFO | Using checkpoint config for model
8
+ 2026-01-21 22:44:08 | WARNING | Missing keys: 283
9
+ 2026-01-21 22:44:09 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
10
+ 2026-01-21 22:44:09 | INFO | Added mlp head for task 'musicality'
11
+ 2026-01-21 22:44:09 | INFO | Added mlp head for task 'alignment'
12
+ 2026-01-21 22:44:09 | INFO | Initializing heads from backbone 'score_projector'
13
+ 2026-01-21 22:44:09 | INFO | Initializing 2 heads from 'score_projector'
14
+ 2026-01-21 22:44:09 | INFO | Task 'musicality': type=mlp, ordinal=False
15
+ 2026-01-21 22:44:09 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
16
+ 2026-01-21 22:44:09 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
17
+ 2026-01-21 22:44:09 | INFO | Loaded 6 parameters, 0 missing
18
+ 2026-01-21 22:44:09 | INFO | ✓ Head initialized from 'score_projector'
19
+ 2026-01-21 22:44:09 | INFO | Task 'alignment': type=mlp, ordinal=False
20
+ 2026-01-21 22:44:09 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
21
+ 2026-01-21 22:44:09 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
22
+ 2026-01-21 22:44:09 | INFO | Loaded 6 parameters, 0 missing
23
+ 2026-01-21 22:44:09 | INFO | ✓ Head initialized from 'score_projector'
24
+ 2026-01-21 22:44:09 | INFO | ✓ All heads initialized
25
+ 2026-01-21 22:44:09 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
26
+ 2026-01-21 22:44:09 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
27
+ 2026-01-21 22:44:09 | INFO | Task 'musicality': train=4322, test=913
28
+ 2026-01-21 22:44:09 | INFO | Task 'alignment': train=1923, test=913
29
+ 2026-01-21 22:44:09 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
30
+ 2026-01-21 22:44:09 | INFO |
31
+ ============================================================
32
+ 2026-01-21 22:44:09 | INFO | Starting Task 1/2: musicality
33
+ 2026-01-21 22:44:09 | INFO | ============================================================
34
+ 2026-01-21 22:44:09 | INFO | Task 'musicality' trainable parameters: 592,897
35
+ 2026-01-21 22:45:12 | INFO | [Task musicality][Step 200/5000] loss=1.1968 | mae=1.1968
36
+ 2026-01-21 22:46:00 | INFO | [Task musicality][Step 400/5000] loss=0.5557 | mae=0.5557
37
+ 2026-01-21 22:46:55 | INFO | [Task musicality][Step 600/5000] loss=0.5162 | mae=0.5162
38
+ 2026-01-21 22:47:56 | INFO | [Task musicality][Step 800/5000] loss=0.4912 | mae=0.4912
39
+ 2026-01-21 22:48:59 | INFO | [Task musicality][Step 1000/5000] loss=0.4753 | mae=0.4753
40
+ 2026-01-21 22:49:05 | INFO | [Val] musicality: loss=0.6083 | mae=0.6083
41
+ 2026-01-21 22:49:55 | INFO | [Task musicality][Step 1200/5000] loss=0.4478 | mae=0.4478
42
+ 2026-01-21 22:50:55 | INFO | [Task musicality][Step 1400/5000] loss=0.4372 | mae=0.4372
43
+ 2026-01-21 22:51:46 | INFO | [Task musicality][Step 1600/5000] loss=0.4211 | mae=0.4211
44
+ 2026-01-21 22:52:40 | INFO | [Task musicality][Step 1800/5000] loss=0.4047 | mae=0.4047
45
+ 2026-01-21 22:53:35 | INFO | [Task musicality][Step 2000/5000] loss=0.3909 | mae=0.3909
46
+ 2026-01-21 22:53:39 | INFO | [Val] musicality: loss=0.6811 | mae=0.6811
47
+ 2026-01-21 22:54:28 | INFO | [Task musicality][Step 2200/5000] loss=0.3705 | mae=0.3705
48
+ 2026-01-21 22:55:18 | INFO | [Task musicality][Step 2400/5000] loss=0.3584 | mae=0.3584
49
+ 2026-01-21 22:56:08 | INFO | [Task musicality][Step 2600/5000] loss=0.3527 | mae=0.3527
50
+ 2026-01-21 22:57:02 | INFO | [Task musicality][Step 2800/5000] loss=0.3346 | mae=0.3346
51
+ 2026-01-21 22:57:51 | INFO | [Task musicality][Step 3000/5000] loss=0.3218 | mae=0.3218
52
+ 2026-01-21 22:57:56 | INFO | [Val] musicality: loss=0.7101 | mae=0.7101
53
+ 2026-01-21 22:58:44 | INFO | [Task musicality][Step 3200/5000] loss=0.3070 | mae=0.3070
54
+ 2026-01-21 22:59:34 | INFO | [Task musicality][Step 3400/5000] loss=0.2961 | mae=0.2961
55
+ 2026-01-21 23:00:25 | INFO | [Task musicality][Step 3600/5000] loss=0.2865 | mae=0.2865
56
+ 2026-01-21 23:01:18 | INFO | [Task musicality][Step 3800/5000] loss=0.2746 | mae=0.2746
57
+ 2026-01-21 23:02:07 | INFO | [Task musicality][Step 4000/5000] loss=0.2674 | mae=0.2674
58
+ 2026-01-21 23:02:10 | INFO | [Val] musicality: loss=0.7399 | mae=0.7399
59
+ 2026-01-21 23:02:59 | INFO | [Task musicality][Step 4200/5000] loss=0.2614 | mae=0.2614
60
+ 2026-01-21 23:03:51 | INFO | [Task musicality][Step 4400/5000] loss=0.2596 | mae=0.2596
61
+ 2026-01-21 23:04:44 | INFO | [Task musicality][Step 4600/5000] loss=0.2512 | mae=0.2512
62
+ 2026-01-21 23:05:30 | INFO | [Task musicality][Step 4800/5000] loss=0.2500 | mae=0.2500
63
+ 2026-01-21 23:06:20 | INFO | [Task musicality][Step 5000/5000] loss=0.2514 | mae=0.2514
64
+ 2026-01-21 23:06:24 | INFO | [Val] musicality: loss=0.7481 | mae=0.7481
65
+ 2026-01-21 23:06:24 | INFO | Task 'musicality' complete. Running validation...
66
+ 2026-01-21 23:06:28 | INFO | [Final Val for musicality] loss=0.7481 | mae=0.7481
67
+ 2026-01-21 23:06:28 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/task_musicality_final.pt
68
+ 2026-01-21 23:06:28 | INFO |
69
+ ============================================================
70
+ 2026-01-21 23:06:28 | INFO | Starting Task 2/2: alignment
71
+ 2026-01-21 23:06:28 | INFO | ============================================================
72
+ 2026-01-21 23:06:28 | INFO | Task 'alignment' trainable parameters: 592,897
73
+ 2026-01-21 23:07:03 | INFO | [Task alignment][Step 200/5000] loss=1.0436 | mae=1.0436
74
+ 2026-01-21 23:07:36 | INFO | [Task alignment][Step 400/5000] loss=0.5449 | mae=0.5449
75
+ 2026-01-21 23:08:11 | INFO | [Task alignment][Step 600/5000] loss=0.4820 | mae=0.4820
76
+ 2026-01-21 23:08:39 | INFO | [Task alignment][Step 800/5000] loss=0.4418 | mae=0.4418
77
+ 2026-01-21 23:09:09 | INFO | [Task alignment][Step 1000/5000] loss=0.4081 | mae=0.4081
78
+ 2026-01-21 23:09:14 | INFO | [Val] alignment: loss=0.6495 | mae=0.6495
79
+ 2026-01-21 23:09:46 | INFO | [Task alignment][Step 1200/5000] loss=0.3896 | mae=0.3896
80
+ 2026-01-21 23:10:18 | INFO | [Task alignment][Step 1400/5000] loss=0.3590 | mae=0.3590
81
+ 2026-01-21 23:10:50 | INFO | [Task alignment][Step 1600/5000] loss=0.3413 | mae=0.3413
82
+ 2026-01-21 23:11:26 | INFO | [Task alignment][Step 1800/5000] loss=0.3241 | mae=0.3241
83
+ 2026-01-21 23:11:58 | INFO | [Task alignment][Step 2000/5000] loss=0.3065 | mae=0.3065
84
+ 2026-01-21 23:12:04 | INFO | [Val] alignment: loss=0.6834 | mae=0.6834
85
+ 2026-01-21 23:12:37 | INFO | [Task alignment][Step 2200/5000] loss=0.2848 | mae=0.2848
86
+ 2026-01-21 23:13:09 | INFO | [Task alignment][Step 2400/5000] loss=0.2722 | mae=0.2722
87
+ 2026-01-21 23:13:44 | INFO | [Task alignment][Step 2600/5000] loss=0.2566 | mae=0.2566
88
+ 2026-01-21 23:14:15 | INFO | [Task alignment][Step 2800/5000] loss=0.2472 | mae=0.2472
89
+ 2026-01-21 23:14:47 | INFO | [Task alignment][Step 3000/5000] loss=0.2325 | mae=0.2325
90
+ 2026-01-21 23:14:53 | INFO | [Val] alignment: loss=0.7010 | mae=0.7010
91
+ 2026-01-21 23:15:24 | INFO | [Task alignment][Step 3200/5000] loss=0.2203 | mae=0.2203
92
+ 2026-01-21 23:15:59 | INFO | [Task alignment][Step 3400/5000] loss=0.2091 | mae=0.2091
93
+ 2026-01-21 23:16:33 | INFO | [Task alignment][Step 3600/5000] loss=0.2033 | mae=0.2033
94
+ 2026-01-21 23:17:11 | INFO | [Task alignment][Step 3800/5000] loss=0.1936 | mae=0.1936
95
+ 2026-01-21 23:17:45 | INFO | [Task alignment][Step 4000/5000] loss=0.1850 | mae=0.1850
96
+ 2026-01-21 23:17:50 | INFO | [Val] alignment: loss=0.7168 | mae=0.7168
97
+ 2026-01-21 23:18:25 | INFO | [Task alignment][Step 4200/5000] loss=0.1814 | mae=0.1814
98
+ 2026-01-21 23:18:57 | INFO | [Task alignment][Step 4400/5000] loss=0.1766 | mae=0.1766
99
+ 2026-01-21 23:19:31 | INFO | [Task alignment][Step 4600/5000] loss=0.1719 | mae=0.1719
100
+ 2026-01-21 23:20:05 | INFO | [Task alignment][Step 4800/5000] loss=0.1727 | mae=0.1727
101
+ 2026-01-21 23:20:39 | INFO | [Task alignment][Step 5000/5000] loss=0.1724 | mae=0.1724
102
+ 2026-01-21 23:20:45 | INFO | [Val] alignment: loss=0.7154 | mae=0.7154
103
+ 2026-01-21 23:20:45 | INFO | Task 'alignment' complete. Running validation...
104
+ 2026-01-21 23:20:49 | INFO | [Final Val for alignment] loss=0.7154 | mae=0.7154
105
+ 2026-01-21 23:20:49 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/task_alignment_final.pt
106
+ 2026-01-21 23:20:49 | INFO |
107
+ ============================================================
108
+ 2026-01-21 23:20:49 | INFO | All tasks complete. Running final validation for all tasks...
109
+ 2026-01-21 23:20:53 | INFO | [Final Val] musicality: loss=0.7481 | mae=0.7481
110
+ 2026-01-21 23:20:58 | INFO | [Final Val] alignment: loss=0.7154 | mae=0.7154
111
+ 2026-01-21 23:20:58 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/downstream_final.pt
112
+ 2026-01-21 23:20:58 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2243/ckpt/downstream_final.pt
downstream/20260121_2300/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ backbone:
10
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune
11
+ transformer/ckpt/reward_model.0.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: score_projector
20
+ musicality:
21
+ use_mlp: true
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: true
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: true
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 5000
51
+ log_interval: 200
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:0
downstream/20260121_2300/train.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ 2026-01-21 23:00:41 | INFO | Starting downstream training: 20260121_2300
2
+ 2026-01-21 23:00:41 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2300
3
+ 2026-01-21 23:00:41 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2300/config.yaml
4
+ 2026-01-21 23:00:41 | INFO | Training tasks: ['musicality', 'alignment']
5
+ 2026-01-21 23:00:41 | INFO | Dataset mode: sequential
6
+ 2026-01-21 23:00:44 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
7
+ 2026-01-21 23:00:44 | INFO | Using checkpoint config for model
downstream/20260121_2319/config.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ backbone:
10
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ freeze: true
12
+ from_ema: false
13
+ dataset:
14
+ train_file: ${project_root}/train_multitask.jsonl
15
+ test_file: ${project_root}/test_multitask.jsonl
16
+ heads:
17
+ hidden_dim: 768
18
+ init_from: score_projector
19
+ musicality:
20
+ use_mlp: true
21
+ ordinal: false
22
+ dropout: 0.1
23
+ num_categories: 9
24
+ y_min: 1.0
25
+ y_max: 5.0
26
+ step: 0.5
27
+ alignment:
28
+ use_mlp: true
29
+ ordinal: false
30
+ dropout: 0.1
31
+ num_categories: 9
32
+ y_min: 1.0
33
+ y_max: 5.0
34
+ step: 0.5
35
+ preference:
36
+ use_mlp: true
37
+ dropout: 0.1
38
+ train:
39
+ num_epochs: 10
40
+ num_train_steps: 2000
41
+ batch_size: 48
42
+ learning_rate: 0.001
43
+ weight_decay: 0.01
44
+ max_grad_norm: 1.0
45
+ warmup_steps: 100
46
+ schedule_type: cosine
47
+ min_lr_ratio: 0.01
48
+ dataset_mode: sequential
49
+ steps_per_task: 5000
50
+ log_interval: 200
51
+ val_interval: 1000
52
+ save_interval: 1000
53
+ num_workers: 8
54
+ resume: null
55
+ device: cuda:1
downstream/20260121_2319/train.log ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 23:19:02 | INFO | Starting downstream training: 20260121_2319
2
+ 2026-01-21 23:19:02 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319
3
+ 2026-01-21 23:19:02 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319/config.yaml
4
+ 2026-01-21 23:19:02 | INFO | Training tasks: ['musicality', 'alignment']
5
+ 2026-01-21 23:19:02 | INFO | Dataset mode: sequential
6
+ 2026-01-21 23:19:22 | INFO | Starting downstream training: 20260121_2319
7
+ 2026-01-21 23:19:22 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319
8
+ 2026-01-21 23:19:22 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319/config.yaml
9
+ 2026-01-21 23:19:22 | INFO | Training tasks: ['musicality', 'alignment']
10
+ 2026-01-21 23:19:22 | INFO | Dataset mode: sequential
11
+ 2026-01-21 23:19:39 | INFO | Starting downstream training: 20260121_2319
12
+ 2026-01-21 23:19:39 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319
13
+ 2026-01-21 23:19:39 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2319/config.yaml
14
+ 2026-01-21 23:19:39 | INFO | Training tasks: ['musicality', 'alignment']
15
+ 2026-01-21 23:19:39 | INFO | Dataset mode: sequential
16
+ 2026-01-21 23:19:42 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
17
+ 2026-01-21 23:19:42 | INFO | Using checkpoint config for model
18
+ 2026-01-21 23:19:48 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
19
+ 2026-01-21 23:19:48 | WARNING | Missing keys: 283
20
+ 2026-01-21 23:19:49 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
21
+ 2026-01-21 23:19:49 | INFO | Added mlp head for task 'musicality'
22
+ 2026-01-21 23:19:49 | INFO | Added mlp head for task 'alignment'
23
+ 2026-01-21 23:19:49 | INFO | Initializing heads from backbone 'score_projector'
24
+ 2026-01-21 23:19:49 | INFO | Initializing 2 heads from 'score_projector'
25
+ 2026-01-21 23:19:49 | INFO | Task 'musicality': type=mlp, ordinal=False
26
+ 2026-01-21 23:19:49 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
27
+ 2026-01-21 23:19:49 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
28
+ 2026-01-21 23:19:49 | INFO | Loaded 6 parameters, 0 missing
29
+ 2026-01-21 23:19:49 | INFO | ✓ Head initialized from 'score_projector'
30
+ 2026-01-21 23:19:49 | INFO | Task 'alignment': type=mlp, ordinal=False
31
+ 2026-01-21 23:19:49 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
32
+ 2026-01-21 23:19:49 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
33
+ 2026-01-21 23:19:49 | INFO | Loaded 6 parameters, 0 missing
34
+ 2026-01-21 23:19:49 | INFO | ✓ Head initialized from 'score_projector'
35
+ 2026-01-21 23:19:49 | INFO | ✓ All heads initialized
36
+ 2026-01-21 23:19:49 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
37
+ 2026-01-21 23:19:49 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
38
+ 2026-01-21 23:19:49 | INFO | Task 'musicality': train=4322, test=913
39
+ 2026-01-21 23:19:49 | INFO | Task 'alignment': train=1923, test=913
40
+ 2026-01-21 23:19:49 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
41
+ 2026-01-21 23:19:49 | INFO |
42
+ ============================================================
43
+ 2026-01-21 23:19:49 | INFO | Starting Task 1/2: musicality
44
+ 2026-01-21 23:19:49 | INFO | ============================================================
45
+ 2026-01-21 23:19:49 | INFO | Task 'musicality' trainable parameters: 592,897
downstream/20260121_2327/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ backbone:
10
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune
11
+ transformer/ckpt/reward_model.0.pt
12
+ freeze: true
13
+ from_ema: false
14
+ dataset:
15
+ train_file: ${project_root}/train_multitask.jsonl
16
+ test_file: ${project_root}/test_multitask.jsonl
17
+ heads:
18
+ hidden_dim: 768
19
+ init_from: score_projector
20
+ musicality:
21
+ use_mlp: true
22
+ ordinal: false
23
+ dropout: 0.1
24
+ num_categories: 9
25
+ y_min: 1.0
26
+ y_max: 5.0
27
+ step: 0.5
28
+ alignment:
29
+ use_mlp: true
30
+ ordinal: false
31
+ dropout: 0.1
32
+ num_categories: 9
33
+ y_min: 1.0
34
+ y_max: 5.0
35
+ step: 0.5
36
+ preference:
37
+ use_mlp: true
38
+ dropout: 0.1
39
+ train:
40
+ num_epochs: 10
41
+ num_train_steps: 2000
42
+ batch_size: 48
43
+ learning_rate: 0.001
44
+ weight_decay: 0.01
45
+ max_grad_norm: 1.0
46
+ warmup_steps: 100
47
+ schedule_type: cosine
48
+ min_lr_ratio: 0.01
49
+ dataset_mode: sequential
50
+ steps_per_task: 5000
51
+ log_interval: 200
52
+ val_interval: 1000
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:1
downstream/20260121_2327/train.log ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 23:27:01 | INFO | Starting downstream training: 20260121_2327
2
+ 2026-01-21 23:27:01 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327
3
+ 2026-01-21 23:27:01 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/config.yaml
4
+ 2026-01-21 23:27:01 | INFO | Training tasks: ['musicality', 'alignment']
5
+ 2026-01-21 23:27:01 | INFO | Dataset mode: sequential
6
+ 2026-01-21 23:27:03 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
7
+ 2026-01-21 23:27:03 | INFO | Using checkpoint config for model
8
+ 2026-01-21 23:27:09 | INFO | Skipping score_projector.3.weight: shape mismatch (ckpt torch.Size([1, 768]) vs model torch.Size([2, 768])), will use randomly initialized weights
9
+ 2026-01-21 23:27:09 | INFO | Skipping score_projector.3.bias: shape mismatch (ckpt torch.Size([1]) vs model torch.Size([2])), will use randomly initialized weights
10
+ 2026-01-21 23:27:09 | INFO | Missing keys (570): ['score_projector.3.weight', 'score_projector.3.bias', 'text_module.model.shared.weight', 'text_module.model.encoder.embed_tokens.weight', 'text_module.model.encoder.block.0.layer.0.SelfAttention.q.weight']...
11
+ 2026-01-21 23:27:09 | WARNING | Missing keys: 59
12
+ 2026-01-21 23:27:10 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
13
+ 2026-01-21 23:27:10 | INFO | Added mlp head for task 'musicality'
14
+ 2026-01-21 23:27:10 | INFO | Added mlp head for task 'alignment'
15
+ 2026-01-21 23:27:10 | INFO | Initializing heads from backbone 'score_projector'
16
+ 2026-01-21 23:27:10 | INFO | Initializing 2 heads from 'score_projector'
17
+ 2026-01-21 23:27:10 | INFO | Task 'musicality': type=mlp, ordinal=False
18
+ 2026-01-21 23:27:10 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
19
+ 2026-01-21 23:27:10 | INFO | Mapped 3.weight -> 4.weight
20
+ 2026-01-21 23:27:10 | INFO | Mapped 3.bias -> 4.bias
21
+ 2026-01-21 23:27:10 | INFO | Loaded 6 parameters, 0 missing
22
+ 2026-01-21 23:27:10 | INFO | ✓ Head initialized from 'score_projector'
23
+ 2026-01-21 23:27:10 | INFO | Task 'alignment': type=mlp, ordinal=False
24
+ 2026-01-21 23:27:10 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
25
+ 2026-01-21 23:27:10 | INFO | Mapped 3.weight -> 4.weight
26
+ 2026-01-21 23:27:10 | INFO | Mapped 3.bias -> 4.bias
27
+ 2026-01-21 23:27:10 | INFO | Loaded 6 parameters, 0 missing
28
+ 2026-01-21 23:27:10 | INFO | ✓ Head initialized from 'score_projector'
29
+ 2026-01-21 23:27:10 | INFO | ✓ All heads initialized
30
+ 2026-01-21 23:27:10 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
31
+ 2026-01-21 23:27:10 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
32
+ 2026-01-21 23:27:10 | INFO | Task 'musicality': train=4322, test=913
33
+ 2026-01-21 23:27:10 | INFO | Task 'alignment': train=1923, test=913
34
+ 2026-01-21 23:27:10 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
35
+ 2026-01-21 23:27:10 | INFO |
36
+ ============================================================
37
+ 2026-01-21 23:27:10 | INFO | Starting Task 1/2: musicality
38
+ 2026-01-21 23:27:10 | INFO | ============================================================
39
+ 2026-01-21 23:27:10 | INFO | Task 'musicality' trainable parameters: 592,897
40
+ 2026-01-21 23:27:43 | INFO | Starting downstream training: 20260121_2327
41
+ 2026-01-21 23:27:43 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327
42
+ 2026-01-21 23:27:43 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/config.yaml
43
+ 2026-01-21 23:27:43 | INFO | Training tasks: ['musicality', 'alignment']
44
+ 2026-01-21 23:27:43 | INFO | Dataset mode: sequential
45
+ 2026-01-21 23:27:45 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
46
+ 2026-01-21 23:27:45 | INFO | Using checkpoint config for model
47
+ 2026-01-21 23:27:51 | INFO | Skipping score_projector.3.weight: shape mismatch (ckpt torch.Size([1, 768]) vs model torch.Size([2, 768])), will use randomly initialized weights
48
+ 2026-01-21 23:27:51 | INFO | Skipping score_projector.3.bias: shape mismatch (ckpt torch.Size([1]) vs model torch.Size([2])), will use randomly initialized weights
49
+ 2026-01-21 23:27:51 | INFO | Missing keys (570): ['score_projector.3.weight', 'score_projector.3.bias', 'text_module.model.shared.weight', 'text_module.model.encoder.embed_tokens.weight', 'text_module.model.encoder.block.0.layer.0.SelfAttention.q.weight']...
50
+ 2026-01-21 23:27:51 | WARNING | Missing keys: 59
51
+ 2026-01-21 23:27:52 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0043_tune transformer/ckpt/reward_model.0.pt
52
+ 2026-01-21 23:27:52 | INFO | Added mlp head for task 'musicality'
53
+ 2026-01-21 23:27:52 | INFO | Added mlp head for task 'alignment'
54
+ 2026-01-21 23:27:52 | INFO | Initializing heads from backbone 'score_projector'
55
+ 2026-01-21 23:27:52 | INFO | Initializing 2 heads from 'score_projector'
56
+ 2026-01-21 23:27:52 | INFO | Task 'musicality': type=mlp, ordinal=False
57
+ 2026-01-21 23:27:52 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
58
+ 2026-01-21 23:27:52 | INFO | Mapped 3.weight -> 4.weight
59
+ 2026-01-21 23:27:52 | INFO | Mapped 3.bias -> 4.bias
60
+ 2026-01-21 23:27:52 | INFO | Loaded 6 parameters, 0 missing
61
+ 2026-01-21 23:27:52 | INFO | ✓ Head initialized from 'score_projector'
62
+ 2026-01-21 23:27:52 | INFO | Task 'alignment': type=mlp, ordinal=False
63
+ 2026-01-21 23:27:52 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
64
+ 2026-01-21 23:27:52 | INFO | Mapped 3.weight -> 4.weight
65
+ 2026-01-21 23:27:52 | INFO | Mapped 3.bias -> 4.bias
66
+ 2026-01-21 23:27:52 | INFO | Loaded 6 parameters, 0 missing
67
+ 2026-01-21 23:27:52 | INFO | ✓ Head initialized from 'score_projector'
68
+ 2026-01-21 23:27:52 | INFO | ✓ All heads initialized
69
+ 2026-01-21 23:27:52 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
70
+ 2026-01-21 23:27:52 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
71
+ 2026-01-21 23:27:52 | INFO | Task 'musicality': train=4322, test=913
72
+ 2026-01-21 23:27:52 | INFO | Task 'alignment': train=1923, test=913
73
+ 2026-01-21 23:27:52 | INFO | [SEQUENTIAL MODE] Training 2 tasks, 5000 steps each
74
+ 2026-01-21 23:27:52 | INFO |
75
+ ============================================================
76
+ 2026-01-21 23:27:52 | INFO | Starting Task 1/2: musicality
77
+ 2026-01-21 23:27:52 | INFO | ============================================================
78
+ 2026-01-21 23:27:52 | INFO | Task 'musicality' trainable parameters: 592,897
79
+ 2026-01-21 23:28:46 | INFO | [Task musicality][Step 200/5000] loss=0.7480 | mse=1.1321
80
+ 2026-01-21 23:29:34 | INFO | [Task musicality][Step 400/5000] loss=0.5146 | mse=0.4447
81
+ 2026-01-21 23:30:28 | INFO | [Task musicality][Step 600/5000] loss=0.4973 | mse=0.4153
82
+ 2026-01-21 23:31:20 | INFO | [Task musicality][Step 800/5000] loss=0.4934 | mse=0.4153
83
+ 2026-01-21 23:32:17 | INFO | [Task musicality][Step 1000/5000] loss=0.4766 | mse=0.3859
84
+ 2026-01-21 23:32:22 | INFO | [Val] musicality: loss=1.5986 | mse=3.2512
85
+ 2026-01-21 23:33:12 | INFO | [Task musicality][Step 1200/5000] loss=0.4538 | mse=0.3574
86
+ 2026-01-21 23:34:00 | INFO | [Task musicality][Step 1400/5000] loss=0.4543 | mse=0.3581
87
+ 2026-01-21 23:34:54 | INFO | [Task musicality][Step 1600/5000] loss=0.4426 | mse=0.3487
88
+ 2026-01-21 23:35:45 | INFO | [Task musicality][Step 1800/5000] loss=0.4348 | mse=0.3336
89
+ 2026-01-21 23:36:40 | INFO | [Task musicality][Step 2000/5000] loss=0.4232 | mse=0.3218
90
+ 2026-01-21 23:36:43 | INFO | [Val] musicality: loss=1.0229 | mse=1.7325
91
+ 2026-01-21 23:37:32 | INFO | [Task musicality][Step 2200/5000] loss=0.4281 | mse=0.3258
92
+ 2026-01-21 23:38:22 | INFO | [Task musicality][Step 2400/5000] loss=0.4129 | mse=0.3094
93
+ 2026-01-21 23:39:13 | INFO | [Task musicality][Step 2600/5000] loss=0.4015 | mse=0.2967
94
+ 2026-01-21 23:40:04 | INFO | [Task musicality][Step 2800/5000] loss=0.3916 | mse=0.2818
95
+ 2026-01-21 23:40:52 | INFO | [Task musicality][Step 3000/5000] loss=0.3814 | mse=0.2732
96
+ 2026-01-21 23:40:55 | INFO | [Val] musicality: loss=0.7424 | mse=0.9581
97
+ 2026-01-21 23:41:42 | INFO | [Task musicality][Step 3200/5000] loss=0.3737 | mse=0.2659
98
+ 2026-01-21 23:42:30 | INFO | [Task musicality][Step 3400/5000] loss=0.3633 | mse=0.2544
99
+ 2026-01-21 23:43:27 | INFO | [Task musicality][Step 3600/5000] loss=0.3555 | mse=0.2459
100
+ 2026-01-21 23:44:19 | INFO | [Task musicality][Step 3800/5000] loss=0.3470 | mse=0.2390
101
+ 2026-01-21 23:45:12 | INFO | [Task musicality][Step 4000/5000] loss=0.3362 | mse=0.2276
102
+ 2026-01-21 23:45:14 | INFO | [Val] musicality: loss=0.7599 | mse=0.9227
103
+ 2026-01-21 23:46:09 | INFO | [Task musicality][Step 4200/5000] loss=0.3332 | mse=0.2259
104
+ 2026-01-21 23:47:02 | INFO | [Task musicality][Step 4400/5000] loss=0.3287 | mse=0.2197
105
+ 2026-01-21 23:48:00 | INFO | [Task musicality][Step 4600/5000] loss=0.3225 | mse=0.2162
106
+ 2026-01-21 23:48:52 | INFO | [Task musicality][Step 4800/5000] loss=0.3192 | mse=0.2123
107
+ 2026-01-21 23:50:06 | INFO | [Task musicality][Step 5000/5000] loss=0.3236 | mse=0.2188
108
+ 2026-01-21 23:50:09 | INFO | [Val] musicality: loss=0.7602 | mse=0.9111
109
+ 2026-01-21 23:50:09 | INFO | Task 'musicality' complete. Running validation...
110
+ 2026-01-21 23:50:12 | INFO | [Final Val for musicality] loss=0.7602 | mse=0.9111
111
+ 2026-01-21 23:50:12 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/task_musicality_final.pt
112
+ 2026-01-21 23:50:12 | INFO |
113
+ ============================================================
114
+ 2026-01-21 23:50:12 | INFO | Starting Task 2/2: alignment
115
+ 2026-01-21 23:50:12 | INFO | ============================================================
116
+ 2026-01-21 23:50:12 | INFO | Task 'alignment' trainable parameters: 592,897
117
+ 2026-01-21 23:50:31 | INFO | [Task alignment][Step 200/5000] loss=0.7793 | mse=1.1387
118
+ 2026-01-21 23:50:47 | INFO | [Task alignment][Step 400/5000] loss=0.5957 | mse=0.5738
119
+ 2026-01-21 23:51:07 | INFO | [Task alignment][Step 600/5000] loss=0.5749 | mse=0.5411
120
+ 2026-01-21 23:51:25 | INFO | [Task alignment][Step 800/5000] loss=0.5583 | mse=0.5129
121
+ 2026-01-21 23:51:42 | INFO | [Task alignment][Step 1000/5000] loss=0.5405 | mse=0.4850
122
+ 2026-01-21 23:51:45 | INFO | [Val] alignment: loss=2.3135 | mse=6.4499
123
+ 2026-01-21 23:52:05 | INFO | [Task alignment][Step 1200/5000] loss=0.5375 | mse=0.4818
124
+ 2026-01-21 23:52:23 | INFO | [Task alignment][Step 1400/5000] loss=0.5087 | mse=0.4395
125
+ 2026-01-21 23:52:40 | INFO | [Task alignment][Step 1600/5000] loss=0.4874 | mse=0.4093
126
+ 2026-01-21 23:53:04 | INFO | [Task alignment][Step 1800/5000] loss=0.4706 | mse=0.3846
127
+ 2026-01-21 23:53:26 | INFO | [Task alignment][Step 2000/5000] loss=0.4602 | mse=0.3714
128
+ 2026-01-21 23:53:29 | INFO | [Val] alignment: loss=1.9561 | mse=5.3080
129
+ 2026-01-21 23:53:51 | INFO | [Task alignment][Step 2200/5000] loss=0.4380 | mse=0.3463
130
+ 2026-01-21 23:54:11 | INFO | [Task alignment][Step 2400/5000] loss=0.4152 | mse=0.3179
131
+ 2026-01-21 23:54:27 | INFO | [Task alignment][Step 2600/5000] loss=0.3968 | mse=0.2991
132
+ 2026-01-21 23:54:47 | INFO | [Task alignment][Step 2800/5000] loss=0.3833 | mse=0.2826
133
+ 2026-01-21 23:55:06 | INFO | [Task alignment][Step 3000/5000] loss=0.3697 | mse=0.2693
134
+ 2026-01-21 23:55:08 | INFO | [Val] alignment: loss=1.5442 | mse=3.9591
135
+ 2026-01-21 23:55:24 | INFO | [Task alignment][Step 3200/5000] loss=0.3551 | mse=0.2510
136
+ 2026-01-21 23:55:46 | INFO | [Task alignment][Step 3400/5000] loss=0.3389 | mse=0.2355
137
+ 2026-01-21 23:56:02 | INFO | [Task alignment][Step 3600/5000] loss=0.3294 | mse=0.2257
138
+ 2026-01-21 23:56:22 | INFO | [Task alignment][Step 3800/5000] loss=0.3186 | mse=0.2177
139
+ 2026-01-21 23:56:44 | INFO | [Task alignment][Step 4000/5000] loss=0.3100 | mse=0.2095
140
+ 2026-01-21 23:56:47 | INFO | [Val] alignment: loss=1.2328 | mse=2.7735
141
+ 2026-01-21 23:57:03 | INFO | [Task alignment][Step 4200/5000] loss=0.2984 | mse=0.1996
142
+ 2026-01-21 23:57:19 | INFO | [Task alignment][Step 4400/5000] loss=0.2988 | mse=0.1998
143
+ 2026-01-21 23:57:38 | INFO | [Task alignment][Step 4600/5000] loss=0.2932 | mse=0.1953
144
+ 2026-01-21 23:57:55 | INFO | [Task alignment][Step 4800/5000] loss=0.2916 | mse=0.1949
145
+ 2026-01-21 23:58:12 | INFO | [Task alignment][Step 5000/5000] loss=0.2898 | mse=0.1938
146
+ 2026-01-21 23:58:15 | INFO | [Val] alignment: loss=1.2016 | mse=2.6704
147
+ 2026-01-21 23:58:15 | INFO | Task 'alignment' complete. Running validation...
148
+ 2026-01-21 23:58:18 | INFO | [Final Val for alignment] loss=1.2016 | mse=2.6704
149
+ 2026-01-21 23:58:18 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/task_alignment_final.pt
150
+ 2026-01-21 23:58:18 | INFO |
151
+ ============================================================
152
+ 2026-01-21 23:58:18 | INFO | All tasks complete. Running final validation for all tasks...
153
+ 2026-01-21 23:58:22 | INFO | [Final Val] musicality: loss=0.7602 | mse=0.9111
154
+ 2026-01-21 23:58:25 | INFO | [Final Val] alignment: loss=1.2016 | mse=2.6704
155
+ 2026-01-21 23:58:25 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/downstream_final.pt
156
+ 2026-01-21 23:58:25 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260121_2327/ckpt/downstream_final.pt
downstream/20260123_0028/downstream_config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ backbone:
9
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0056_tune_t5_transformer/ckpt/reward_model.best_49205.pt
10
+ freeze: true
11
+ from_ema: false
12
+ dataset:
13
+ train_file: ${project_root}/train_multitask.jsonl
14
+ test_file: ${project_root}/test_multitask.jsonl
15
+ heads:
16
+ hidden_dim: 768
17
+ init_from: score_projector
18
+ musicality:
19
+ use_mlp: true
20
+ ordinal: false
21
+ dropout: 0.1
22
+ num_categories: 9
23
+ y_min: 1.0
24
+ y_max: 5.0
25
+ step: 0.5
26
+ alignment:
27
+ use_mlp: true
28
+ ordinal: false
29
+ dropout: 0.1
30
+ num_categories: 9
31
+ y_min: 1.0
32
+ y_max: 5.0
33
+ step: 0.5
34
+ preference:
35
+ use_mlp: true
36
+ dropout: 0.1
37
+ train:
38
+ num_epochs: 10
39
+ num_train_steps: 2000
40
+ batch_size: 48
41
+ learning_rate: 0.001
42
+ weight_decay: 0.01
43
+ max_grad_norm: 1.0
44
+ warmup_steps: 100
45
+ schedule_type: cosine
46
+ min_lr_ratio: 0.01
47
+ dataset_mode: sequential
48
+ steps_per_task: 5000
49
+ log_interval: 200
50
+ val_interval: 1000
51
+ save_interval: 1000
52
+ num_workers: 8
53
+ resume: null
54
+ device: cuda:4
downstream/20260123_0028/train.log ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-23 00:28:23 | INFO | Starting downstream training: 20260123_0028
2
+ 2026-01-23 00:28:23 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028
3
+ 2026-01-23 00:28:23 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/downstream_config.yaml
4
+ 2026-01-23 00:28:23 | INFO | Training tasks: ['musicality']
5
+ 2026-01-23 00:28:23 | INFO | Dataset mode: sequential
6
+ 2026-01-23 00:28:26 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0056_tune_t5_transformer/ckpt/reward_model.best_49205.pt
7
+ 2026-01-23 00:28:27 | INFO | Using checkpoint config for model
8
+ 2026-01-23 00:28:33 | INFO | Skipping score_projector.3.weight: shape mismatch (ckpt torch.Size([1, 768]) vs model torch.Size([2, 768])), will use randomly initialized weights
9
+ 2026-01-23 00:28:33 | INFO | Skipping score_projector.3.bias: shape mismatch (ckpt torch.Size([1]) vs model torch.Size([2])), will use randomly initialized weights
10
+ 2026-01-23 00:28:33 | INFO | Missing keys (570): ['score_projector.3.weight', 'score_projector.3.bias', 'text_module.model.shared.weight', 'text_module.model.encoder.embed_tokens.weight', 'text_module.model.encoder.block.0.layer.0.SelfAttention.q.weight']...
11
+ 2026-01-23 00:28:33 | WARNING | Missing keys: 59
12
+ 2026-01-23 00:28:33 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260121_0056_tune_t5_transformer/ckpt/reward_model.best_49205.pt
13
+ 2026-01-23 00:28:33 | INFO | Created DownstreamTaskModel (backbone frozen)
14
+ 2026-01-23 00:28:33 | INFO | Added mlp head for task 'musicality'
15
+ 2026-01-23 00:28:33 | INFO | Initializing heads from backbone 'score_projector'
16
+ 2026-01-23 00:28:33 | INFO | Initializing 1 heads from 'score_projector'
17
+ 2026-01-23 00:28:33 | INFO | Task 'musicality': type=mlp, ordinal=False
18
+ 2026-01-23 00:28:33 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
19
+ 2026-01-23 00:28:33 | INFO | Mapped 3.weight -> 4.weight
20
+ 2026-01-23 00:28:33 | INFO | Mapped 3.bias -> 4.bias
21
+ 2026-01-23 00:28:33 | INFO | Loaded 6 parameters, 0 missing
22
+ 2026-01-23 00:28:33 | INFO | ✓ Head initialized from 'score_projector'
23
+ 2026-01-23 00:28:33 | INFO | ✓ All heads initialized
24
+ 2026-01-23 00:28:33 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
25
+ 2026-01-23 00:28:33 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
26
+ 2026-01-23 00:28:33 | INFO | Task 'musicality': train=4322, test=913
27
+ 2026-01-23 00:28:33 | INFO | [SEQUENTIAL MODE] Training 1 tasks, 5000 steps each
28
+ 2026-01-23 00:28:33 | INFO |
29
+ ============================================================
30
+ 2026-01-23 00:28:33 | INFO | Starting Task 1/1: musicality
31
+ 2026-01-23 00:28:33 | INFO | ============================================================
32
+ 2026-01-23 00:28:33 | INFO | Task 'musicality' trainable parameters: 592,897
33
+ 2026-01-23 00:29:35 | INFO | [Task musicality][Step 200/5000] loss=0.7903 | mse=1.1744
34
+ 2026-01-23 00:30:26 | INFO | [Task musicality][Step 400/5000] loss=0.5796 | mse=0.5450
35
+ 2026-01-23 00:31:22 | INFO | [Task musicality][Step 600/5000] loss=0.5725 | mse=0.5392
36
+ 2026-01-23 00:32:25 | INFO | [Task musicality][Step 800/5000] loss=0.5767 | mse=0.5413
37
+ 2026-01-23 00:33:21 | INFO | [Task musicality][Step 1000/5000] loss=0.5483 | mse=0.4924
38
+ 2026-01-23 00:33:25 | INFO | [Val] musicality: loss=2.1414 | mse=5.9358
39
+ 2026-01-23 00:34:22 | INFO | [Task musicality][Step 1200/5000] loss=0.5340 | mse=0.4705
40
+ 2026-01-23 00:35:20 | INFO | [Task musicality][Step 1400/5000] loss=0.5328 | mse=0.4756
41
+ 2026-01-23 00:36:11 | INFO | [Task musicality][Step 1600/5000] loss=0.5312 | mse=0.4669
42
+ 2026-01-23 00:37:02 | INFO | [Task musicality][Step 1800/5000] loss=0.5304 | mse=0.4696
43
+ 2026-01-23 00:38:00 | INFO | [Task musicality][Step 2000/5000] loss=0.5116 | mse=0.4377
44
+ 2026-01-23 00:38:03 | INFO | [Val] musicality: loss=2.0244 | mse=5.7591
45
+ 2026-01-23 00:38:55 | INFO | [Task musicality][Step 2200/5000] loss=0.5056 | mse=0.4309
46
+ 2026-01-23 00:39:47 | INFO | [Task musicality][Step 2400/5000] loss=0.5109 | mse=0.4386
47
+ 2026-01-23 00:40:44 | INFO | [Task musicality][Step 2600/5000] loss=0.4995 | mse=0.4218
48
+ 2026-01-23 00:41:39 | INFO | [Task musicality][Step 2800/5000] loss=0.4991 | mse=0.4187
49
+ 2026-01-23 00:42:29 | INFO | [Task musicality][Step 3000/5000] loss=0.4946 | mse=0.4163
50
+ 2026-01-23 00:42:32 | INFO | [Val] musicality: loss=1.9526 | mse=5.2377
51
+ 2026-01-23 00:43:24 | INFO | [Task musicality][Step 3200/5000] loss=0.4876 | mse=0.4065
52
+ 2026-01-23 00:44:17 | INFO | [Task musicality][Step 3400/5000] loss=0.4854 | mse=0.4062
53
+ 2026-01-23 00:45:09 | INFO | [Task musicality][Step 3600/5000] loss=0.4806 | mse=0.3975
54
+ 2026-01-23 00:46:05 | INFO | [Task musicality][Step 3800/5000] loss=0.4834 | mse=0.3997
55
+ 2026-01-23 00:47:00 | INFO | [Task musicality][Step 4000/5000] loss=0.4770 | mse=0.3938
56
+ 2026-01-23 00:47:03 | INFO | [Val] musicality: loss=1.7301 | mse=4.4097
57
+ 2026-01-23 00:47:54 | INFO | [Task musicality][Step 4200/5000] loss=0.4680 | mse=0.3779
58
+ 2026-01-23 00:48:53 | INFO | [Task musicality][Step 4400/5000] loss=0.4696 | mse=0.3838
59
+ 2026-01-23 00:49:55 | INFO | [Task musicality][Step 4600/5000] loss=0.4686 | mse=0.3824
60
+ 2026-01-23 00:50:48 | INFO | [Task musicality][Step 4800/5000] loss=0.4648 | mse=0.3765
61
+ 2026-01-23 00:51:41 | INFO | [Task musicality][Step 5000/5000] loss=0.4687 | mse=0.3847
62
+ 2026-01-23 00:51:44 | INFO | [Val] musicality: loss=1.7043 | mse=4.3205
63
+ 2026-01-23 00:51:44 | INFO | Task 'musicality' complete. Running validation...
64
+ 2026-01-23 00:51:48 | INFO | [Final Val for musicality] loss=1.7043 | mse=4.3205
65
+ 2026-01-23 00:51:48 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/ckpt/task_musicality_final.pt
66
+ 2026-01-23 00:51:48 | INFO |
67
+ ============================================================
68
+ 2026-01-23 00:51:48 | INFO | All tasks complete. Running final validation for all tasks...
69
+ 2026-01-23 00:51:51 | INFO | [Final Val] musicality: loss=1.7043 | mse=4.3205
70
+ 2026-01-23 00:51:51 | INFO | Saved checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/ckpt/downstream_final.pt
71
+ 2026-01-23 00:51:51 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream/20260123_0028/ckpt/downstream_final.pt
downstream_mixed/20260122_1200/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream_mixed
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: false
13
+ freeze_encoder_only: true
14
+ from_ema: false
15
+ dataset:
16
+ train_file: ${project_root}/train_multitask.jsonl
17
+ test_file: ${project_root}/test_multitask.jsonl
18
+ heads:
19
+ hidden_dim: 768
20
+ init_from: score_projector
21
+ musicality:
22
+ use_mlp: true
23
+ ordinal: false
24
+ dropout: 0.1
25
+ num_categories: 9
26
+ y_min: 1.0
27
+ y_max: 5.0
28
+ step: 0.5
29
+ alignment:
30
+ use_mlp: true
31
+ ordinal: false
32
+ dropout: 0.1
33
+ num_categories: 9
34
+ y_min: 1.0
35
+ y_max: 5.0
36
+ step: 0.5
37
+ preference:
38
+ use_mlp: true
39
+ dropout: 0.1
40
+ train:
41
+ dataset_mode: mixed
42
+ num_train_steps: 5000
43
+ batch_size: 32
44
+ learning_rate: 0.0001
45
+ backbone_learning_rate: 1.0e-05
46
+ weight_decay: 0.01
47
+ max_grad_norm: 1.0
48
+ warmup_steps: 200
49
+ schedule_type: cosine
50
+ min_lr_ratio: 0.01
51
+ log_interval: 100
52
+ val_interval: 500
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda:1
downstream_mixed/20260122_1200/train.log ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-22 12:00:33 | INFO | Starting downstream training: 20260122_1200
2
+ 2026-01-22 12:00:33 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200
3
+ 2026-01-22 12:00:33 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/config.yaml
4
+ 2026-01-22 12:00:33 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-22 12:00:33 | INFO | Dataset mode: mixed
6
+ 2026-01-22 12:00:36 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-22 12:00:36 | INFO | Using checkpoint config for model
8
+ 2026-01-22 12:00:43 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
9
+ 2026-01-22 12:00:43 | WARNING | Missing keys: 283
10
+ 2026-01-22 12:00:44 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ 2026-01-22 12:00:44 | INFO | Created MixedDownstreamTaskModel (freeze_encoder_only=True)
12
+ 2026-01-22 12:00:44 | INFO | Added mlp head for task 'musicality'
13
+ 2026-01-22 12:00:44 | INFO | Added mlp head for task 'alignment'
14
+ 2026-01-22 12:00:44 | INFO | Added mlp head for task 'preference'
15
+ 2026-01-22 12:00:44 | INFO | Initializing heads from backbone 'score_projector'
16
+ 2026-01-22 12:00:44 | INFO | Initializing 3 heads from 'score_projector'
17
+ 2026-01-22 12:00:44 | INFO | Task 'musicality': type=mlp, ordinal=False
18
+ 2026-01-22 12:00:44 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
19
+ 2026-01-22 12:00:44 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
20
+ 2026-01-22 12:00:44 | INFO | Loaded 6 parameters, 0 missing
21
+ 2026-01-22 12:00:44 | INFO | ✓ Head initialized from 'score_projector'
22
+ 2026-01-22 12:00:44 | INFO | Task 'alignment': type=mlp, ordinal=False
23
+ 2026-01-22 12:00:44 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
24
+ 2026-01-22 12:00:44 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
25
+ 2026-01-22 12:00:44 | INFO | Loaded 6 parameters, 0 missing
26
+ 2026-01-22 12:00:44 | INFO | ✓ Head initialized from 'score_projector'
27
+ 2026-01-22 12:00:44 | INFO | Task 'preference': type=mlp, ordinal=False
28
+ 2026-01-22 12:00:44 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
29
+ 2026-01-22 12:00:44 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
30
+ 2026-01-22 12:00:44 | INFO | Loaded 6 parameters, 0 missing
31
+ 2026-01-22 12:00:44 | INFO | ✓ Head initialized from 'score_projector'
32
+ 2026-01-22 12:00:44 | INFO | ✓ All heads initialized
33
+ 2026-01-22 12:00:44 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
34
+ 2026-01-22 12:00:44 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
35
+ 2026-01-22 12:00:44 | INFO | Task 'musicality': train=4322, test=913
36
+ 2026-01-22 12:00:44 | INFO | Task 'alignment': train=1923, test=913
37
+ 2026-01-22 12:00:44 | INFO | Task 'preference': train=1065, test=275
38
+ 2026-01-22 12:00:44 | INFO | Backbone trainable parameters: 21,279,237 (lr=1e-05)
39
+ 2026-01-22 12:00:44 | INFO | Head parameters: 1,778,691 (lr=0.0001)
40
+ 2026-01-22 12:00:44 | INFO | Total trainable parameters: 23,057,928
41
+ 2026-01-22 12:00:44 | INFO | [MIXED MODE] Starting training for 5000 steps
42
+ 2026-01-22 12:00:44 | INFO | Backbone LR: 1e-05, Head LR: 0.0001
43
+ 2026-01-22 12:03:00 | INFO | [Step 100] musicality/loss=4.0637 | musicality/mse=26.2373 | alignment/loss=3.0336 | alignment/mse=14.6745 | preference/loss=1.5111 | preference/accuracy=0.5675 | lr_backbone=5.05e-06 | lr_heads=5.05e-05
44
+ 2026-01-22 12:04:58 | INFO | [Step 200] musicality/loss=1.0183 | musicality/mse=1.7072 | alignment/loss=0.9960 | alignment/mse=1.6934 | preference/loss=0.6578 | preference/accuracy=0.6678 | lr_backbone=1.00e-05 | lr_heads=1.00e-04
45
+ 2026-01-22 12:06:51 | INFO | [Step 300] musicality/loss=0.6885 | musicality/mse=0.7626 | alignment/loss=0.6725 | alignment/mse=0.7337 | preference/loss=0.5054 | preference/accuracy=0.7491 | lr_backbone=9.99e-06 | lr_heads=9.99e-05
46
+ 2026-01-22 12:08:47 | INFO | [Step 400] musicality/loss=0.6275 | musicality/mse=0.6383 | alignment/loss=0.6024 | alignment/mse=0.5992 | preference/loss=0.4471 | preference/accuracy=0.7819 | lr_backbone=9.96e-06 | lr_heads=9.96e-05
47
+ 2026-01-22 12:10:39 | INFO | [Step 500] musicality/loss=0.5644 | musicality/mse=0.5226 | alignment/loss=0.5718 | alignment/mse=0.5490 | preference/loss=0.4029 | preference/accuracy=0.8100 | lr_backbone=9.90e-06 | lr_heads=9.90e-05
48
+ 2026-01-22 12:10:39 | INFO | [Step 500] Running validation...
49
+ 2026-01-22 12:10:44 | INFO | [Val] musicality: loss=0.6608 | mse=0.6632
50
+ 2026-01-22 12:10:49 | INFO | [Val] alignment: loss=0.6571 | mse=0.7059
51
+ 2026-01-22 12:10:59 | INFO | [Val] preference: loss=0.5869 | accuracy=0.7231
52
+ 2026-01-22 12:10:59 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_best.pt (81.2MB, 58 params)
53
+ 2026-01-22 12:10:59 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_best_full.pt (161.6MB, 3 heads)
54
+ 2026-01-22 12:10:59 | INFO | New best model saved (val_loss=0.6349)
55
+ 2026-01-22 12:13:03 | INFO | [Step 600] musicality/loss=0.5445 | musicality/mse=0.4930 | alignment/loss=0.5371 | alignment/mse=0.4832 | preference/loss=0.3639 | preference/accuracy=0.8369 | lr_backbone=9.83e-06 | lr_heads=9.83e-05
56
+ 2026-01-22 12:15:04 | INFO | [Step 700] musicality/loss=0.5212 | musicality/mse=0.4580 | alignment/loss=0.5114 | alignment/mse=0.4531 | preference/loss=0.3288 | preference/accuracy=0.8612 | lr_backbone=9.73e-06 | lr_heads=9.73e-05
57
+ 2026-01-22 12:17:04 | INFO | [Step 800] musicality/loss=0.5041 | musicality/mse=0.4401 | alignment/loss=0.4980 | alignment/mse=0.4283 | preference/loss=0.3097 | preference/accuracy=0.8694 | lr_backbone=9.62e-06 | lr_heads=9.62e-05
58
+ 2026-01-22 12:19:00 | INFO | [Step 900] musicality/loss=0.4869 | musicality/mse=0.4069 | alignment/loss=0.4819 | alignment/mse=0.4107 | preference/loss=0.2636 | preference/accuracy=0.8972 | lr_backbone=9.48e-06 | lr_heads=9.48e-05
59
+ 2026-01-22 12:20:56 | INFO | [Step 1000] musicality/loss=0.4772 | musicality/mse=0.3909 | alignment/loss=0.4657 | alignment/mse=0.3832 | preference/loss=0.2406 | preference/accuracy=0.9075 | lr_backbone=9.33e-06 | lr_heads=9.33e-05
60
+ 2026-01-22 12:20:56 | INFO | [Step 1000] Running validation...
61
+ 2026-01-22 12:21:01 | INFO | [Val] musicality: loss=0.6333 | mse=0.6206
62
+ 2026-01-22 12:21:09 | INFO | [Val] alignment: loss=0.6804 | mse=0.7634
63
+ 2026-01-22 12:21:18 | INFO | [Val] preference: loss=0.6525 | accuracy=0.7290
64
+ 2026-01-22 12:21:18 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_1000.pt (81.2MB, 58 params)
65
+ 2026-01-22 12:21:18 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_1000_full.pt (161.6MB, 3 heads)
66
+ 2026-01-22 12:23:17 | INFO | [Step 1100] musicality/loss=0.4655 | musicality/mse=0.3807 | alignment/loss=0.4512 | alignment/mse=0.3668 | preference/loss=0.2191 | preference/accuracy=0.9225 | lr_backbone=9.16e-06 | lr_heads=9.16e-05
67
+ 2026-01-22 12:25:13 | INFO | [Step 1200] musicality/loss=0.4532 | musicality/mse=0.3656 | alignment/loss=0.4378 | alignment/mse=0.3502 | preference/loss=0.1958 | preference/accuracy=0.9363 | lr_backbone=8.97e-06 | lr_heads=8.97e-05
68
+ 2026-01-22 12:27:18 | INFO | [Step 1300] musicality/loss=0.4398 | musicality/mse=0.3375 | alignment/loss=0.4246 | alignment/mse=0.3317 | preference/loss=0.1737 | preference/accuracy=0.9472 | lr_backbone=8.76e-06 | lr_heads=8.76e-05
69
+ 2026-01-22 12:29:21 | INFO | [Step 1400] musicality/loss=0.4341 | musicality/mse=0.3397 | alignment/loss=0.4080 | alignment/mse=0.3086 | preference/loss=0.1520 | preference/accuracy=0.9534 | lr_backbone=8.54e-06 | lr_heads=8.54e-05
70
+ 2026-01-22 12:31:24 | INFO | [Step 1500] musicality/loss=0.4334 | musicality/mse=0.3378 | alignment/loss=0.4012 | alignment/mse=0.2984 | preference/loss=0.1414 | preference/accuracy=0.9547 | lr_backbone=8.30e-06 | lr_heads=8.30e-05
71
+ 2026-01-22 12:31:24 | INFO | [Step 1500] Running validation...
72
+ 2026-01-22 12:31:28 | INFO | [Val] musicality: loss=0.6763 | mse=0.7138
73
+ 2026-01-22 12:31:33 | INFO | [Val] alignment: loss=0.7246 | mse=0.8572
74
+ 2026-01-22 12:31:40 | INFO | [Val] preference: loss=0.8507 | accuracy=0.7173
75
+ 2026-01-22 12:33:37 | INFO | [Step 1600] musicality/loss=0.4255 | musicality/mse=0.3209 | alignment/loss=0.3842 | alignment/mse=0.2749 | preference/loss=0.1293 | preference/accuracy=0.9566 | lr_backbone=8.04e-06 | lr_heads=8.04e-05
76
+ 2026-01-22 12:35:41 | INFO | [Step 1700] musicality/loss=0.4066 | musicality/mse=0.3057 | alignment/loss=0.3841 | alignment/mse=0.2792 | preference/loss=0.1069 | preference/accuracy=0.9703 | lr_backbone=7.78e-06 | lr_heads=7.78e-05
77
+ 2026-01-22 12:37:40 | INFO | [Step 1800] musicality/loss=0.4080 | musicality/mse=0.3009 | alignment/loss=0.3715 | alignment/mse=0.2686 | preference/loss=0.1050 | preference/accuracy=0.9722 | lr_backbone=7.50e-06 | lr_heads=7.50e-05
78
+ 2026-01-22 12:39:42 | INFO | [Step 1900] musicality/loss=0.3994 | musicality/mse=0.2916 | alignment/loss=0.3563 | alignment/mse=0.2474 | preference/loss=0.0925 | preference/accuracy=0.9759 | lr_backbone=7.21e-06 | lr_heads=7.21e-05
79
+ 2026-01-22 12:41:44 | INFO | [Step 2000] musicality/loss=0.3932 | musicality/mse=0.2833 | alignment/loss=0.3522 | alignment/mse=0.2453 | preference/loss=0.0871 | preference/accuracy=0.9759 | lr_backbone=6.91e-06 | lr_heads=6.91e-05
80
+ 2026-01-22 12:41:44 | INFO | [Step 2000] Running validation...
81
+ 2026-01-22 12:41:48 | INFO | [Val] musicality: loss=0.6617 | mse=0.6857
82
+ 2026-01-22 12:41:52 | INFO | [Val] alignment: loss=0.7773 | mse=0.9801
83
+ 2026-01-22 12:41:59 | INFO | [Val] preference: loss=1.0762 | accuracy=0.6999
84
+ 2026-01-22 12:42:00 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_2000.pt (81.2MB, 58 params)
85
+ 2026-01-22 12:42:00 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_2000_full.pt (161.6MB, 3 heads)
86
+ 2026-01-22 12:43:55 | INFO | [Step 2100] musicality/loss=0.3933 | musicality/mse=0.2810 | alignment/loss=0.3404 | alignment/mse=0.2300 | preference/loss=0.0796 | preference/accuracy=0.9797 | lr_backbone=6.61e-06 | lr_heads=6.61e-05
87
+ 2026-01-22 12:45:52 | INFO | [Step 2200] musicality/loss=0.3666 | musicality/mse=0.2535 | alignment/loss=0.3335 | alignment/mse=0.2198 | preference/loss=0.0720 | preference/accuracy=0.9822 | lr_backbone=6.29e-06 | lr_heads=6.29e-05
88
+ 2026-01-22 12:47:46 | INFO | [Step 2300] musicality/loss=0.3828 | musicality/mse=0.2731 | alignment/loss=0.3260 | alignment/mse=0.2147 | preference/loss=0.0662 | preference/accuracy=0.9856 | lr_backbone=5.98e-06 | lr_heads=5.98e-05
89
+ 2026-01-22 12:49:37 | INFO | [Step 2400] musicality/loss=0.3704 | musicality/mse=0.2589 | alignment/loss=0.3215 | alignment/mse=0.2067 | preference/loss=0.0641 | preference/accuracy=0.9831 | lr_backbone=5.65e-06 | lr_heads=5.65e-05
90
+ 2026-01-22 12:51:41 | INFO | [Step 2500] musicality/loss=0.3680 | musicality/mse=0.2558 | alignment/loss=0.3119 | alignment/mse=0.1950 | preference/loss=0.0548 | preference/accuracy=0.9866 | lr_backbone=5.33e-06 | lr_heads=5.33e-05
91
+ 2026-01-22 12:51:41 | INFO | [Step 2500] Running validation...
92
+ 2026-01-22 12:51:45 | INFO | [Val] musicality: loss=0.6730 | mse=0.7145
93
+ 2026-01-22 12:51:49 | INFO | [Val] alignment: loss=0.7797 | mse=0.9899
94
+ 2026-01-22 12:51:56 | INFO | [Val] preference: loss=1.1633 | accuracy=0.7127
95
+ 2026-01-22 12:53:51 | INFO | [Step 2600] musicality/loss=0.3629 | musicality/mse=0.2462 | alignment/loss=0.3097 | alignment/mse=0.1931 | preference/loss=0.0521 | preference/accuracy=0.9884 | lr_backbone=5.00e-06 | lr_heads=5.00e-05
96
+ 2026-01-22 12:55:42 | INFO | [Step 2700] musicality/loss=0.3622 | musicality/mse=0.2491 | alignment/loss=0.2991 | alignment/mse=0.1803 | preference/loss=0.0474 | preference/accuracy=0.9900 | lr_backbone=4.67e-06 | lr_heads=4.67e-05
97
+ 2026-01-22 12:57:46 | INFO | [Step 2800] musicality/loss=0.3593 | musicality/mse=0.2445 | alignment/loss=0.2913 | alignment/mse=0.1758 | preference/loss=0.0468 | preference/accuracy=0.9919 | lr_backbone=4.35e-06 | lr_heads=4.35e-05
98
+ 2026-01-22 12:59:53 | INFO | [Step 2900] musicality/loss=0.3444 | musicality/mse=0.2250 | alignment/loss=0.3002 | alignment/mse=0.1828 | preference/loss=0.0455 | preference/accuracy=0.9903 | lr_backbone=4.02e-06 | lr_heads=4.02e-05
99
+ 2026-01-22 13:01:55 | INFO | [Step 3000] musicality/loss=0.3463 | musicality/mse=0.2247 | alignment/loss=0.2832 | alignment/mse=0.1671 | preference/loss=0.0444 | preference/accuracy=0.9903 | lr_backbone=3.71e-06 | lr_heads=3.71e-05
100
+ 2026-01-22 13:01:55 | INFO | [Step 3000] Running validation...
101
+ 2026-01-22 13:01:58 | INFO | [Val] musicality: loss=0.6822 | mse=0.7429
102
+ 2026-01-22 13:02:02 | INFO | [Val] alignment: loss=0.8000 | mse=1.0463
103
+ 2026-01-22 13:02:09 | INFO | [Val] preference: loss=1.2784 | accuracy=0.7058
104
+ 2026-01-22 13:02:09 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_3000.pt (81.2MB, 58 params)
105
+ 2026-01-22 13:02:09 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_3000_full.pt (161.6MB, 3 heads)
106
+ 2026-01-22 13:04:19 | INFO | [Step 3100] musicality/loss=0.3449 | musicality/mse=0.2249 | alignment/loss=0.2821 | alignment/mse=0.1644 | preference/loss=0.0420 | preference/accuracy=0.9912 | lr_backbone=3.39e-06 | lr_heads=3.39e-05
107
+ 2026-01-22 13:06:23 | INFO | [Step 3200] musicality/loss=0.3391 | musicality/mse=0.2169 | alignment/loss=0.2769 | alignment/mse=0.1577 | preference/loss=0.0362 | preference/accuracy=0.9916 | lr_backbone=3.09e-06 | lr_heads=3.09e-05
108
+ 2026-01-22 13:08:21 | INFO | [Step 3300] musicality/loss=0.3372 | musicality/mse=0.2252 | alignment/loss=0.2789 | alignment/mse=0.1595 | preference/loss=0.0374 | preference/accuracy=0.9928 | lr_backbone=2.79e-06 | lr_heads=2.79e-05
109
+ 2026-01-22 13:10:26 | INFO | [Step 3400] musicality/loss=0.3323 | musicality/mse=0.2121 | alignment/loss=0.2744 | alignment/mse=0.1610 | preference/loss=0.0376 | preference/accuracy=0.9897 | lr_backbone=2.50e-06 | lr_heads=2.50e-05
110
+ 2026-01-22 13:12:22 | INFO | [Step 3500] musicality/loss=0.3326 | musicality/mse=0.2191 | alignment/loss=0.2689 | alignment/mse=0.1545 | preference/loss=0.0375 | preference/accuracy=0.9922 | lr_backbone=2.22e-06 | lr_heads=2.22e-05
111
+ 2026-01-22 13:12:22 | INFO | [Step 3500] Running validation...
112
+ 2026-01-22 13:12:26 | INFO | [Val] musicality: loss=0.7052 | mse=0.7941
113
+ 2026-01-22 13:12:31 | INFO | [Val] alignment: loss=0.7979 | mse=1.0505
114
+ 2026-01-22 13:12:38 | INFO | [Val] preference: loss=1.3824 | accuracy=0.6871
115
+ 2026-01-22 13:14:37 | INFO | [Step 3600] musicality/loss=0.3297 | musicality/mse=0.2113 | alignment/loss=0.2602 | alignment/mse=0.1440 | preference/loss=0.0351 | preference/accuracy=0.9928 | lr_backbone=1.96e-06 | lr_heads=1.96e-05
116
+ 2026-01-22 13:16:45 | INFO | [Step 3700] musicality/loss=0.3212 | musicality/mse=0.2035 | alignment/loss=0.2628 | alignment/mse=0.1474 | preference/loss=0.0336 | preference/accuracy=0.9928 | lr_backbone=1.70e-06 | lr_heads=1.70e-05
117
+ 2026-01-22 13:18:51 | INFO | [Step 3800] musicality/loss=0.3207 | musicality/mse=0.1961 | alignment/loss=0.2625 | alignment/mse=0.1466 | preference/loss=0.0336 | preference/accuracy=0.9941 | lr_backbone=1.46e-06 | lr_heads=1.46e-05
118
+ 2026-01-22 13:20:47 | INFO | [Step 3900] musicality/loss=0.3257 | musicality/mse=0.2132 | alignment/loss=0.2556 | alignment/mse=0.1387 | preference/loss=0.0300 | preference/accuracy=0.9953 | lr_backbone=1.24e-06 | lr_heads=1.24e-05
119
+ 2026-01-22 13:22:53 | INFO | [Step 4000] musicality/loss=0.3214 | musicality/mse=0.2049 | alignment/loss=0.2551 | alignment/mse=0.1389 | preference/loss=0.0310 | preference/accuracy=0.9966 | lr_backbone=1.03e-06 | lr_heads=1.03e-05
120
+ 2026-01-22 13:22:53 | INFO | [Step 4000] Running validation...
121
+ 2026-01-22 13:22:58 | INFO | [Val] musicality: loss=0.6972 | mse=0.7796
122
+ 2026-01-22 13:23:02 | INFO | [Val] alignment: loss=0.8132 | mse=1.0816
123
+ 2026-01-22 13:23:10 | INFO | [Val] preference: loss=1.4036 | accuracy=0.6965
124
+ 2026-01-22 13:23:10 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_4000.pt (81.2MB, 58 params)
125
+ 2026-01-22 13:23:10 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_4000_full.pt (161.6MB, 3 heads)
126
+ 2026-01-22 13:25:13 | INFO | [Step 4100] musicality/loss=0.3218 | musicality/mse=0.2021 | alignment/loss=0.2590 | alignment/mse=0.1476 | preference/loss=0.0311 | preference/accuracy=0.9934 | lr_backbone=8.43e-07 | lr_heads=8.43e-06
127
+ 2026-01-22 13:27:13 | INFO | [Step 4200] musicality/loss=0.3236 | musicality/mse=0.2082 | alignment/loss=0.2549 | alignment/mse=0.1375 | preference/loss=0.0300 | preference/accuracy=0.9956 | lr_backbone=6.70e-07 | lr_heads=6.70e-06
128
+ 2026-01-22 13:29:11 | INFO | [Step 4300] musicality/loss=0.3143 | musicality/mse=0.1926 | alignment/loss=0.2508 | alignment/mse=0.1364 | preference/loss=0.0328 | preference/accuracy=0.9938 | lr_backbone=5.16e-07 | lr_heads=5.16e-06
129
+ 2026-01-22 13:31:10 | INFO | [Step 4400] musicality/loss=0.3274 | musicality/mse=0.2154 | alignment/loss=0.2571 | alignment/mse=0.1395 | preference/loss=0.0303 | preference/accuracy=0.9941 | lr_backbone=3.81e-07 | lr_heads=3.81e-06
130
+ 2026-01-22 13:33:13 | INFO | [Step 4500] musicality/loss=0.3214 | musicality/mse=0.2035 | alignment/loss=0.2498 | alignment/mse=0.1368 | preference/loss=0.0318 | preference/accuracy=0.9947 | lr_backbone=2.65e-07 | lr_heads=2.65e-06
131
+ 2026-01-22 13:33:13 | INFO | [Step 4500] Running validation...
132
+ 2026-01-22 13:33:17 | INFO | [Val] musicality: loss=0.6957 | mse=0.7749
133
+ 2026-01-22 13:33:21 | INFO | [Val] alignment: loss=0.8114 | mse=1.0750
134
+ 2026-01-22 13:33:28 | INFO | [Val] preference: loss=1.4276 | accuracy=0.6965
135
+ 2026-01-22 13:35:34 | INFO | [Step 4600] musicality/loss=0.3194 | musicality/mse=0.1995 | alignment/loss=0.2504 | alignment/mse=0.1391 | preference/loss=0.0274 | preference/accuracy=0.9966 | lr_backbone=1.70e-07 | lr_heads=1.70e-06
136
+ 2026-01-22 13:37:34 | INFO | [Step 4700] musicality/loss=0.3210 | musicality/mse=0.2021 | alignment/loss=0.2475 | alignment/mse=0.1335 | preference/loss=0.0311 | preference/accuracy=0.9947 | lr_backbone=9.61e-08 | lr_heads=9.61e-07
137
+ 2026-01-22 13:39:36 | INFO | [Step 4800] musicality/loss=0.3196 | musicality/mse=0.2007 | alignment/loss=0.2581 | alignment/mse=0.1434 | preference/loss=0.0277 | preference/accuracy=0.9962 | lr_backbone=4.28e-08 | lr_heads=4.28e-07
138
+ 2026-01-22 13:41:37 | INFO | [Step 4900] musicality/loss=0.3171 | musicality/mse=0.2014 | alignment/loss=0.2531 | alignment/mse=0.1374 | preference/loss=0.0261 | preference/accuracy=0.9975 | lr_backbone=1.07e-08 | lr_heads=1.07e-07
139
+ 2026-01-22 13:43:33 | INFO | [Step 5000] musicality/loss=0.3200 | musicality/mse=0.1975 | alignment/loss=0.2537 | alignment/mse=0.1386 | preference/loss=0.0290 | preference/accuracy=0.9944 | lr_backbone=0.00e+00 | lr_heads=0.00e+00
140
+ 2026-01-22 13:43:33 | INFO | [Step 5000] Running validation...
141
+ 2026-01-22 13:43:37 | INFO | [Val] musicality: loss=0.6978 | mse=0.7797
142
+ 2026-01-22 13:43:42 | INFO | [Val] alignment: loss=0.8105 | mse=1.0741
143
+ 2026-01-22 13:43:49 | INFO | [Val] preference: loss=1.4317 | accuracy=0.6930
144
+ 2026-01-22 13:43:49 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_5000.pt (81.2MB, 58 params)
145
+ 2026-01-22 13:43:49 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_step_5000_full.pt (161.6MB, 3 heads)
146
+ 2026-01-22 13:43:49 | INFO | Training complete. Running final validation...
147
+ 2026-01-22 13:43:54 | INFO | [Final Val] musicality: loss=0.6978 | mse=0.7797
148
+ 2026-01-22 13:43:59 | INFO | [Final Val] alignment: loss=0.8105 | mse=1.0741
149
+ 2026-01-22 13:44:08 | INFO | [Final Val] preference: loss=1.4317 | accuracy=0.6930
150
+ 2026-01-22 13:44:08 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_final.pt (81.2MB, 58 params)
151
+ 2026-01-22 13:44:08 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_final_full.pt (161.6MB, 3 heads)
152
+ 2026-01-22 13:44:08 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1200/ckpt/mixed_final.pt
downstream_mixed/20260122_1955/config.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_mode: CA
2
+ attn_dropout: 0.0
3
+ category_embeddings: null
4
+ dim: 768
5
+ dim_head: 64
6
+ downsample:
7
+ configs:
8
+ conv2_4x:
9
+ factor: 4
10
+ kernel_size: 5
11
+ kind: conv*2
12
+ use_layernorm: true
13
+ conv_4x:
14
+ factor: 4
15
+ kernel_size: 5
16
+ kind: conv
17
+ stage: 1
18
+ use_layernorm: true
19
+ glu_4x:
20
+ factor: 4
21
+ kernel_size: 5
22
+ kind: gluconv*2+pw
23
+ use_layernorm: true
24
+ mean:
25
+ factor: 2
26
+ kind: mean
27
+ mean_4x:
28
+ dropout: 0.0
29
+ factor: 30
30
+ kind: mean+mlp
31
+ mlp_ratio: 2.0
32
+ none:
33
+ factor: 1
34
+ kind: none
35
+ eval: mean_4x
36
+ ref: null
37
+ text: none
38
+ ff_dropout: 0.0
39
+ ff_mult: 4
40
+ freeze_audio: true
41
+ freeze_text: true
42
+ heads: 8
43
+ joint_tf_depth: 1
44
+ load_config:
45
+ checkpoint_path: null
46
+ frozen_from_pretrained: true
47
+ pretrained_name: OpenMuQ/MuQ-MuLan-large
48
+ strict: false
49
+ mlp_dim: 768
50
+ mode: text_only
51
+ model_name: OpenMuQ/MuQ-MuLan-large
52
+ name: reward
53
+ null_embedding:
54
+ audio:
55
+ dropout: 0.5
56
+ length: 10
57
+ lyrics:
58
+ dropout: 0.3
59
+ length: 10
60
+ text:
61
+ dropout: 0.2
62
+ length: 10
63
+ output_dim: 2
64
+ prompt_tf_depth: 1
65
+ sr: 24000
66
+ text_encoder: muq_mulan
67
+ text_lora_config: null
68
+ train_muq_depth: 0
69
+ use_layer_idx: -1
downstream_mixed/20260122_1955/downstream_config.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream_mixed
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: false
13
+ freeze_encoder_only: true
14
+ from_ema: false
15
+ dataset:
16
+ train_file: ${project_root}/train_multitask.jsonl
17
+ test_file: ${project_root}/test_multitask.jsonl
18
+ heads:
19
+ hidden_dim: 768
20
+ init_from: score_projector
21
+ musicality:
22
+ use_mlp: true
23
+ ordinal: false
24
+ dropout: 0.1
25
+ use_tanh: true
26
+ num_categories: 9
27
+ y_min: 1.0
28
+ y_max: 5.0
29
+ step: 0.5
30
+ alignment:
31
+ use_mlp: true
32
+ use_tanh: true
33
+ ordinal: false
34
+ dropout: 0.1
35
+ num_categories: 9
36
+ y_min: 1.0
37
+ y_max: 5.0
38
+ step: 0.5
39
+ preference:
40
+ use_mlp: true
41
+ dropout: 0.1
42
+ train:
43
+ dataset_mode: mixed
44
+ num_train_steps: 5000
45
+ batch_size: 32
46
+ learning_rate: 0.0001
47
+ backbone_learning_rate: 1.0e-05
48
+ weight_decay: 0.01
49
+ max_grad_norm: 1.0
50
+ warmup_steps: 200
51
+ schedule_type: cosine
52
+ min_lr_ratio: 0.01
53
+ log_interval: 100
54
+ val_interval: 500
55
+ save_interval: 1000
56
+ num_workers: 8
57
+ resume: null
58
+ device: cuda
downstream_mixed/20260122_1955/train.log ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-22 19:55:26 | INFO | Starting downstream training: 20260122_1955
2
+ 2026-01-22 19:55:26 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955
3
+ 2026-01-22 19:55:26 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/downstream_config.yaml
4
+ 2026-01-22 19:55:26 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-22 19:55:26 | INFO | Dataset mode: mixed
6
+ 2026-01-22 19:55:29 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-22 19:55:29 | INFO | Using checkpoint config for model
8
+ 2026-01-22 19:55:35 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
9
+ 2026-01-22 19:55:35 | WARNING | Missing keys: 283
10
+ 2026-01-22 19:55:36 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ 2026-01-22 19:55:36 | INFO | Created MixedDownstreamTaskModel (freeze_encoder_only=True)
12
+ 2026-01-22 19:55:36 | INFO | Added mlp head for task 'musicality'
13
+ 2026-01-22 19:55:36 | INFO | Added mlp head for task 'alignment'
14
+ 2026-01-22 19:55:36 | INFO | Added mlp head for task 'preference'
15
+ 2026-01-22 19:55:36 | INFO | Initializing heads from backbone 'score_projector'
16
+ 2026-01-22 19:55:36 | INFO | Initializing 3 heads from 'score_projector'
17
+ 2026-01-22 19:55:36 | INFO | Task 'musicality': type=mlp, ordinal=False
18
+ 2026-01-22 19:55:36 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
19
+ 2026-01-22 19:55:36 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
20
+ 2026-01-22 19:55:36 | INFO | Loaded 6 parameters, 0 missing
21
+ 2026-01-22 19:55:36 | INFO | ✓ Head initialized from 'score_projector'
22
+ 2026-01-22 19:55:36 | INFO | Task 'alignment': type=mlp, ordinal=False
23
+ 2026-01-22 19:55:36 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
24
+ 2026-01-22 19:55:36 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
25
+ 2026-01-22 19:55:36 | INFO | Loaded 6 parameters, 0 missing
26
+ 2026-01-22 19:55:36 | INFO | ✓ Head initialized from 'score_projector'
27
+ 2026-01-22 19:55:36 | INFO | Task 'preference': type=mlp, ordinal=False
28
+ 2026-01-22 19:55:36 | INFO | Initializing head from 'score_projector' (type=mlp, from_ema=False)
29
+ 2026-01-22 19:55:36 | INFO | Slicing 3.weight -> 4.weight: torch.Size([2, 768]) -> torch.Size([1, 768])
30
+ 2026-01-22 19:55:36 | INFO | Loaded 6 parameters, 0 missing
31
+ 2026-01-22 19:55:36 | INFO | ✓ Head initialized from 'score_projector'
32
+ 2026-01-22 19:55:36 | INFO | ✓ All heads initialized
33
+ 2026-01-22 19:55:36 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
34
+ 2026-01-22 19:55:36 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
35
+ 2026-01-22 19:55:36 | INFO | Task 'musicality': train=4322, test=913
36
+ 2026-01-22 19:55:36 | INFO | Task 'alignment': train=1923, test=913
37
+ 2026-01-22 19:55:36 | INFO | Task 'preference': train=1065, test=275
38
+ 2026-01-22 19:55:36 | INFO | Backbone trainable parameters: 21,279,237 (lr=1e-05)
39
+ 2026-01-22 19:55:36 | INFO | Head parameters: 1,778,691 (lr=0.0001)
40
+ 2026-01-22 19:55:36 | INFO | Total trainable parameters: 23,057,928
41
+ 2026-01-22 19:55:36 | INFO | [MIXED MODE] Starting training for 5000 steps
42
+ 2026-01-22 19:55:36 | INFO | Backbone LR: 1e-05, Head LR: 0.0001
43
+ 2026-01-22 19:58:02 | INFO | [Step 100] musicality/loss=4.0102 | musicality/mse=25.7604 | alignment/loss=3.0251 | alignment/mse=14.7271 | preference/loss=1.5069 | preference/accuracy=0.5759 | lr_backbone=5.05e-06 | lr_heads=5.05e-05
44
+ 2026-01-22 20:00:09 | INFO | [Step 200] musicality/loss=1.0180 | musicality/mse=1.7198 | alignment/loss=1.0024 | alignment/mse=1.7404 | preference/loss=0.6600 | preference/accuracy=0.6647 | lr_backbone=1.00e-05 | lr_heads=1.00e-04
45
+ 2026-01-22 20:02:13 | INFO | [Step 300] musicality/loss=0.6936 | musicality/mse=0.7847 | alignment/loss=0.6862 | alignment/mse=0.7737 | preference/loss=0.5112 | preference/accuracy=0.7488 | lr_backbone=9.99e-06 | lr_heads=9.99e-05
46
+ 2026-01-22 20:04:17 | INFO | [Step 400] musicality/loss=0.6136 | musicality/mse=0.6091 | alignment/loss=0.5997 | alignment/mse=0.5944 | preference/loss=0.4582 | preference/accuracy=0.7828 | lr_backbone=9.96e-06 | lr_heads=9.96e-05
47
+ 2026-01-22 20:06:15 | INFO | [Step 500] musicality/loss=0.5617 | musicality/mse=0.5180 | alignment/loss=0.5633 | alignment/mse=0.5330 | preference/loss=0.4022 | preference/accuracy=0.8131 | lr_backbone=9.90e-06 | lr_heads=9.90e-05
48
+ 2026-01-22 20:06:15 | INFO | [Step 500] Running validation...
49
+ 2026-01-22 20:06:21 | INFO | [Val] musicality: loss=0.6488 | mse=0.6439
50
+ 2026-01-22 20:06:26 | INFO | [Val] alignment: loss=0.6735 | mse=0.7288
51
+ 2026-01-22 20:06:42 | INFO | [Val] preference: loss=0.5791 | accuracy=0.7301
52
+ 2026-01-22 20:06:42 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_best.pt (81.2MB, 58 params)
53
+ 2026-01-22 20:06:42 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_best_full.pt (161.6MB, 3 heads)
54
+ 2026-01-22 20:06:42 | INFO | Saved model config to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/config.yaml
55
+ 2026-01-22 20:06:42 | INFO | New best model saved (val_loss=0.6338)
56
+ 2026-01-22 20:08:48 | INFO | [Step 600] musicality/loss=0.5497 | musicality/mse=0.5046 | alignment/loss=0.5343 | alignment/mse=0.4822 | preference/loss=0.3690 | preference/accuracy=0.8356 | lr_backbone=9.83e-06 | lr_heads=9.83e-05
57
+ 2026-01-22 20:10:58 | INFO | [Step 700] musicality/loss=0.5343 | musicality/mse=0.4759 | alignment/loss=0.5158 | alignment/mse=0.4526 | preference/loss=0.3361 | preference/accuracy=0.8562 | lr_backbone=9.73e-06 | lr_heads=9.73e-05
58
+ 2026-01-22 20:13:04 | INFO | [Step 800] musicality/loss=0.5077 | musicality/mse=0.4405 | alignment/loss=0.4961 | alignment/mse=0.4343 | preference/loss=0.3054 | preference/accuracy=0.8659 | lr_backbone=9.62e-06 | lr_heads=9.62e-05
59
+ 2026-01-22 20:15:10 | INFO | [Step 900] musicality/loss=0.4827 | musicality/mse=0.4026 | alignment/loss=0.4907 | alignment/mse=0.4216 | preference/loss=0.2724 | preference/accuracy=0.8909 | lr_backbone=9.48e-06 | lr_heads=9.48e-05
60
+ 2026-01-22 20:17:16 | INFO | [Step 1000] musicality/loss=0.4706 | musicality/mse=0.3813 | alignment/loss=0.4595 | alignment/mse=0.3812 | preference/loss=0.2412 | preference/accuracy=0.9059 | lr_backbone=9.33e-06 | lr_heads=9.33e-05
61
+ 2026-01-22 20:17:16 | INFO | [Step 1000] Running validation...
62
+ 2026-01-22 20:17:20 | INFO | [Val] musicality: loss=0.6520 | mse=0.6573
63
+ 2026-01-22 20:17:25 | INFO | [Val] alignment: loss=0.7110 | mse=0.8175
64
+ 2026-01-22 20:17:33 | INFO | [Val] preference: loss=0.6850 | accuracy=0.7290
65
+ 2026-01-22 20:17:33 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_1000.pt (81.2MB, 58 params)
66
+ 2026-01-22 20:17:33 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_1000_full.pt (161.6MB, 3 heads)
67
+ 2026-01-22 20:19:38 | INFO | [Step 1100] musicality/loss=0.4653 | musicality/mse=0.3839 | alignment/loss=0.4591 | alignment/mse=0.3804 | preference/loss=0.2110 | preference/accuracy=0.9219 | lr_backbone=9.16e-06 | lr_heads=9.16e-05
68
+ 2026-01-22 20:21:40 | INFO | [Step 1200] musicality/loss=0.4585 | musicality/mse=0.3653 | alignment/loss=0.4425 | alignment/mse=0.3537 | preference/loss=0.1931 | preference/accuracy=0.9275 | lr_backbone=8.97e-06 | lr_heads=8.97e-05
69
+ 2026-01-22 20:23:52 | INFO | [Step 1300] musicality/loss=0.4420 | musicality/mse=0.3432 | alignment/loss=0.4205 | alignment/mse=0.3258 | preference/loss=0.1749 | preference/accuracy=0.9450 | lr_backbone=8.76e-06 | lr_heads=8.76e-05
70
+ 2026-01-22 20:25:58 | INFO | [Step 1400] musicality/loss=0.4351 | musicality/mse=0.3408 | alignment/loss=0.4205 | alignment/mse=0.3224 | preference/loss=0.1601 | preference/accuracy=0.9466 | lr_backbone=8.54e-06 | lr_heads=8.54e-05
71
+ 2026-01-22 20:28:00 | INFO | [Step 1500] musicality/loss=0.4294 | musicality/mse=0.3276 | alignment/loss=0.3935 | alignment/mse=0.2947 | preference/loss=0.1411 | preference/accuracy=0.9563 | lr_backbone=8.30e-06 | lr_heads=8.30e-05
72
+ 2026-01-22 20:28:00 | INFO | [Step 1500] Running validation...
73
+ 2026-01-22 20:28:04 | INFO | [Val] musicality: loss=0.6670 | mse=0.6936
74
+ 2026-01-22 20:28:08 | INFO | [Val] alignment: loss=0.7408 | mse=0.8923
75
+ 2026-01-22 20:28:16 | INFO | [Val] preference: loss=0.8865 | accuracy=0.7151
76
+ 2026-01-22 20:30:15 | INFO | [Step 1600] musicality/loss=0.4214 | musicality/mse=0.3162 | alignment/loss=0.3917 | alignment/mse=0.2898 | preference/loss=0.1362 | preference/accuracy=0.9572 | lr_backbone=8.04e-06 | lr_heads=8.04e-05
77
+ 2026-01-22 20:32:17 | INFO | [Step 1700] musicality/loss=0.4154 | musicality/mse=0.3088 | alignment/loss=0.3825 | alignment/mse=0.2771 | preference/loss=0.1202 | preference/accuracy=0.9637 | lr_backbone=7.78e-06 | lr_heads=7.78e-05
78
+ 2026-01-22 20:34:22 | INFO | [Step 1800] musicality/loss=0.4103 | musicality/mse=0.3085 | alignment/loss=0.3628 | alignment/mse=0.2559 | preference/loss=0.1093 | preference/accuracy=0.9694 | lr_backbone=7.50e-06 | lr_heads=7.50e-05
79
+ 2026-01-22 20:36:26 | INFO | [Step 1900] musicality/loss=0.3988 | musicality/mse=0.2859 | alignment/loss=0.3553 | alignment/mse=0.2509 | preference/loss=0.0938 | preference/accuracy=0.9725 | lr_backbone=7.21e-06 | lr_heads=7.21e-05
80
+ 2026-01-22 20:38:26 | INFO | [Step 2000] musicality/loss=0.3971 | musicality/mse=0.2937 | alignment/loss=0.3568 | alignment/mse=0.2439 | preference/loss=0.0850 | preference/accuracy=0.9800 | lr_backbone=6.91e-06 | lr_heads=6.91e-05
81
+ 2026-01-22 20:38:26 | INFO | [Step 2000] Running validation...
82
+ 2026-01-22 20:38:29 | INFO | [Val] musicality: loss=0.6845 | mse=0.7318
83
+ 2026-01-22 20:38:33 | INFO | [Val] alignment: loss=0.7468 | mse=0.9195
84
+ 2026-01-22 20:38:40 | INFO | [Val] preference: loss=1.0557 | accuracy=0.7127
85
+ 2026-01-22 20:38:40 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_2000.pt (81.2MB, 58 params)
86
+ 2026-01-22 20:38:40 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_2000_full.pt (161.6MB, 3 heads)
87
+ 2026-01-22 20:40:41 | INFO | [Step 2100] musicality/loss=0.3805 | musicality/mse=0.2657 | alignment/loss=0.3390 | alignment/mse=0.2305 | preference/loss=0.0812 | preference/accuracy=0.9769 | lr_backbone=6.61e-06 | lr_heads=6.61e-05
88
+ 2026-01-22 20:42:41 | INFO | [Step 2200] musicality/loss=0.3902 | musicality/mse=0.2838 | alignment/loss=0.3319 | alignment/mse=0.2229 | preference/loss=0.0766 | preference/accuracy=0.9784 | lr_backbone=6.29e-06 | lr_heads=6.29e-05
89
+ 2026-01-22 20:44:45 | INFO | [Step 2300] musicality/loss=0.3838 | musicality/mse=0.2732 | alignment/loss=0.3318 | alignment/mse=0.2150 | preference/loss=0.0723 | preference/accuracy=0.9784 | lr_backbone=5.98e-06 | lr_heads=5.98e-05
90
+ 2026-01-22 20:46:49 | INFO | [Step 2400] musicality/loss=0.3717 | musicality/mse=0.2606 | alignment/loss=0.3228 | alignment/mse=0.2107 | preference/loss=0.0689 | preference/accuracy=0.9834 | lr_backbone=5.65e-06 | lr_heads=5.65e-05
91
+ 2026-01-22 20:48:54 | INFO | [Step 2500] musicality/loss=0.3597 | musicality/mse=0.2386 | alignment/loss=0.3152 | alignment/mse=0.2051 | preference/loss=0.0572 | preference/accuracy=0.9853 | lr_backbone=5.33e-06 | lr_heads=5.33e-05
92
+ 2026-01-22 20:48:54 | INFO | [Step 2500] Running validation...
93
+ 2026-01-22 20:48:58 | INFO | [Val] musicality: loss=0.6836 | mse=0.7373
94
+ 2026-01-22 20:49:03 | INFO | [Val] alignment: loss=0.7766 | mse=0.9850
95
+ 2026-01-22 20:49:10 | INFO | [Val] preference: loss=1.2142 | accuracy=0.6906
96
+ 2026-01-22 20:51:14 | INFO | [Step 2600] musicality/loss=0.3659 | musicality/mse=0.2496 | alignment/loss=0.3106 | alignment/mse=0.1954 | preference/loss=0.0531 | preference/accuracy=0.9891 | lr_backbone=5.00e-06 | lr_heads=5.00e-05
97
+ 2026-01-22 20:53:13 | INFO | [Step 2700] musicality/loss=0.3661 | musicality/mse=0.2551 | alignment/loss=0.3030 | alignment/mse=0.1852 | preference/loss=0.0515 | preference/accuracy=0.9875 | lr_backbone=4.67e-06 | lr_heads=4.67e-05
98
+ 2026-01-22 20:55:14 | INFO | [Step 2800] musicality/loss=0.3553 | musicality/mse=0.2406 | alignment/loss=0.3005 | alignment/mse=0.1872 | preference/loss=0.0515 | preference/accuracy=0.9888 | lr_backbone=4.35e-06 | lr_heads=4.35e-05
99
+ 2026-01-22 20:57:16 | INFO | [Step 2900] musicality/loss=0.3592 | musicality/mse=0.2419 | alignment/loss=0.2965 | alignment/mse=0.1796 | preference/loss=0.0445 | preference/accuracy=0.9888 | lr_backbone=4.02e-06 | lr_heads=4.02e-05
100
+ 2026-01-22 20:59:16 | INFO | [Step 3000] musicality/loss=0.3505 | musicality/mse=0.2338 | alignment/loss=0.2840 | alignment/mse=0.1693 | preference/loss=0.0439 | preference/accuracy=0.9916 | lr_backbone=3.71e-06 | lr_heads=3.71e-05
101
+ 2026-01-22 20:59:16 | INFO | [Step 3000] Running validation...
102
+ 2026-01-22 20:59:20 | INFO | [Val] musicality: loss=0.7002 | mse=0.7711
103
+ 2026-01-22 20:59:25 | INFO | [Val] alignment: loss=0.7825 | mse=1.0091
104
+ 2026-01-22 20:59:32 | INFO | [Val] preference: loss=1.3055 | accuracy=0.6965
105
+ 2026-01-22 20:59:32 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_3000.pt (81.2MB, 58 params)
106
+ 2026-01-22 20:59:32 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_3000_full.pt (161.6MB, 3 heads)
107
+ 2026-01-22 21:01:30 | INFO | [Step 3100] musicality/loss=0.3430 | musicality/mse=0.2244 | alignment/loss=0.2829 | alignment/mse=0.1674 | preference/loss=0.0455 | preference/accuracy=0.9903 | lr_backbone=3.39e-06 | lr_heads=3.39e-05
108
+ 2026-01-22 21:03:28 | INFO | [Step 3200] musicality/loss=0.3406 | musicality/mse=0.2224 | alignment/loss=0.2833 | alignment/mse=0.1693 | preference/loss=0.0406 | preference/accuracy=0.9903 | lr_backbone=3.09e-06 | lr_heads=3.09e-05
109
+ 2026-01-22 21:05:26 | INFO | [Step 3300] musicality/loss=0.3375 | musicality/mse=0.2178 | alignment/loss=0.2742 | alignment/mse=0.1619 | preference/loss=0.0361 | preference/accuracy=0.9925 | lr_backbone=2.79e-06 | lr_heads=2.79e-05
110
+ 2026-01-22 21:07:33 | INFO | [Step 3400] musicality/loss=0.3322 | musicality/mse=0.2134 | alignment/loss=0.2738 | alignment/mse=0.1624 | preference/loss=0.0381 | preference/accuracy=0.9931 | lr_backbone=2.50e-06 | lr_heads=2.50e-05
111
+ 2026-01-22 21:09:29 | INFO | [Step 3500] musicality/loss=0.3395 | musicality/mse=0.2221 | alignment/loss=0.2694 | alignment/mse=0.1529 | preference/loss=0.0383 | preference/accuracy=0.9919 | lr_backbone=2.22e-06 | lr_heads=2.22e-05
112
+ 2026-01-22 21:09:29 | INFO | [Step 3500] Running validation...
113
+ 2026-01-22 21:09:34 | INFO | [Val] musicality: loss=0.7099 | mse=0.7968
114
+ 2026-01-22 21:09:39 | INFO | [Val] alignment: loss=0.7999 | mse=1.0596
115
+ 2026-01-22 21:09:47 | INFO | [Val] preference: loss=1.4323 | accuracy=0.6860
116
+ 2026-01-22 21:11:45 | INFO | [Step 3600] musicality/loss=0.3416 | musicality/mse=0.2289 | alignment/loss=0.2664 | alignment/mse=0.1553 | preference/loss=0.0351 | preference/accuracy=0.9944 | lr_backbone=1.96e-06 | lr_heads=1.96e-05
117
+ 2026-01-22 21:13:47 | INFO | [Step 3700] musicality/loss=0.3342 | musicality/mse=0.2132 | alignment/loss=0.2616 | alignment/mse=0.1459 | preference/loss=0.0350 | preference/accuracy=0.9931 | lr_backbone=1.70e-06 | lr_heads=1.70e-05
118
+ 2026-01-22 21:15:53 | INFO | [Step 3800] musicality/loss=0.3355 | musicality/mse=0.2140 | alignment/loss=0.2655 | alignment/mse=0.1539 | preference/loss=0.0359 | preference/accuracy=0.9931 | lr_backbone=1.46e-06 | lr_heads=1.46e-05
119
+ 2026-01-22 21:17:52 | INFO | [Step 3900] musicality/loss=0.3208 | musicality/mse=0.2009 | alignment/loss=0.2614 | alignment/mse=0.1469 | preference/loss=0.0318 | preference/accuracy=0.9950 | lr_backbone=1.24e-06 | lr_heads=1.24e-05
120
+ 2026-01-22 21:19:50 | INFO | [Step 4000] musicality/loss=0.3265 | musicality/mse=0.2104 | alignment/loss=0.2603 | alignment/mse=0.1458 | preference/loss=0.0311 | preference/accuracy=0.9950 | lr_backbone=1.03e-06 | lr_heads=1.03e-05
121
+ 2026-01-22 21:19:50 | INFO | [Step 4000] Running validation...
122
+ 2026-01-22 21:19:55 | INFO | [Val] musicality: loss=0.7095 | mse=0.7949
123
+ 2026-01-22 21:19:58 | INFO | [Val] alignment: loss=0.8059 | mse=1.0730
124
+ 2026-01-22 21:20:06 | INFO | [Val] preference: loss=1.4329 | accuracy=0.6802
125
+ 2026-01-22 21:20:06 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_4000.pt (81.2MB, 58 params)
126
+ 2026-01-22 21:20:06 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_4000_full.pt (161.6MB, 3 heads)
127
+ 2026-01-22 21:22:06 | INFO | [Step 4100] musicality/loss=0.3249 | musicality/mse=0.2018 | alignment/loss=0.2543 | alignment/mse=0.1402 | preference/loss=0.0294 | preference/accuracy=0.9956 | lr_backbone=8.43e-07 | lr_heads=8.43e-06
128
+ 2026-01-22 21:24:01 | INFO | [Step 4200] musicality/loss=0.3225 | musicality/mse=0.2066 | alignment/loss=0.2586 | alignment/mse=0.1471 | preference/loss=0.0328 | preference/accuracy=0.9931 | lr_backbone=6.70e-07 | lr_heads=6.70e-06
129
+ 2026-01-22 21:25:56 | INFO | [Step 4300] musicality/loss=0.3226 | musicality/mse=0.2079 | alignment/loss=0.2555 | alignment/mse=0.1438 | preference/loss=0.0289 | preference/accuracy=0.9947 | lr_backbone=5.16e-07 | lr_heads=5.16e-06
130
+ 2026-01-22 21:27:56 | INFO | [Step 4400] musicality/loss=0.3169 | musicality/mse=0.1972 | alignment/loss=0.2543 | alignment/mse=0.1377 | preference/loss=0.0299 | preference/accuracy=0.9941 | lr_backbone=3.81e-07 | lr_heads=3.81e-06
131
+ 2026-01-22 21:29:52 | INFO | [Step 4500] musicality/loss=0.3281 | musicality/mse=0.2140 | alignment/loss=0.2503 | alignment/mse=0.1388 | preference/loss=0.0312 | preference/accuracy=0.9953 | lr_backbone=2.65e-07 | lr_heads=2.65e-06
132
+ 2026-01-22 21:29:52 | INFO | [Step 4500] Running validation...
133
+ 2026-01-22 21:29:57 | INFO | [Val] musicality: loss=0.7054 | mse=0.7867
134
+ 2026-01-22 21:30:02 | INFO | [Val] alignment: loss=0.8063 | mse=1.0763
135
+ 2026-01-22 21:30:11 | INFO | [Val] preference: loss=1.4512 | accuracy=0.6767
136
+ 2026-01-22 21:32:14 | INFO | [Step 4600] musicality/loss=0.3216 | musicality/mse=0.2008 | alignment/loss=0.2560 | alignment/mse=0.1431 | preference/loss=0.0332 | preference/accuracy=0.9944 | lr_backbone=1.70e-07 | lr_heads=1.70e-06
137
+ 2026-01-22 21:34:14 | INFO | [Step 4700] musicality/loss=0.3259 | musicality/mse=0.2067 | alignment/loss=0.2512 | alignment/mse=0.1408 | preference/loss=0.0284 | preference/accuracy=0.9956 | lr_backbone=9.61e-08 | lr_heads=9.61e-07
138
+ 2026-01-22 21:36:15 | INFO | [Step 4800] musicality/loss=0.3268 | musicality/mse=0.2086 | alignment/loss=0.2501 | alignment/mse=0.1375 | preference/loss=0.0310 | preference/accuracy=0.9928 | lr_backbone=4.28e-08 | lr_heads=4.28e-07
139
+ 2026-01-22 21:38:19 | INFO | [Step 4900] musicality/loss=0.3168 | musicality/mse=0.1950 | alignment/loss=0.2517 | alignment/mse=0.1389 | preference/loss=0.0309 | preference/accuracy=0.9938 | lr_backbone=1.07e-08 | lr_heads=1.07e-07
140
+ 2026-01-22 21:40:24 | INFO | [Step 5000] musicality/loss=0.3217 | musicality/mse=0.2037 | alignment/loss=0.2489 | alignment/mse=0.1369 | preference/loss=0.0322 | preference/accuracy=0.9938 | lr_backbone=0.00e+00 | lr_heads=0.00e+00
141
+ 2026-01-22 21:40:24 | INFO | [Step 5000] Running validation...
142
+ 2026-01-22 21:40:28 | INFO | [Val] musicality: loss=0.7057 | mse=0.7873
143
+ 2026-01-22 21:40:32 | INFO | [Val] alignment: loss=0.8051 | mse=1.0733
144
+ 2026-01-22 21:40:39 | INFO | [Val] preference: loss=1.4580 | accuracy=0.6767
145
+ 2026-01-22 21:40:39 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_5000.pt (81.2MB, 58 params)
146
+ 2026-01-22 21:40:39 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_step_5000_full.pt (161.6MB, 3 heads)
147
+ 2026-01-22 21:40:39 | INFO | Training complete. Running final validation...
148
+ 2026-01-22 21:40:44 | INFO | [Final Val] musicality: loss=0.7057 | mse=0.7873
149
+ 2026-01-22 21:40:48 | INFO | [Final Val] alignment: loss=0.8051 | mse=1.0733
150
+ 2026-01-22 21:40:56 | INFO | [Final Val] preference: loss=1.4580 | accuracy=0.6767
151
+ 2026-01-22 21:40:56 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_final.pt (81.2MB, 58 params)
152
+ 2026-01-22 21:40:57 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_final_full.pt (161.6MB, 3 heads)
153
+ 2026-01-22 21:40:57 | INFO | Done! Checkpoint saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed/20260122_1955/ckpt/mixed_final.pt
downstream_mixed_linear/20260122_1143/config.yaml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DEVICES: '7'
2
+ accelerate:
3
+ mixed_precision: bf16
4
+ basics:
5
+ random_seed: 42
6
+ save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
7
+ dataset:
8
+ audio_dropout:
9
+ apply_to_eval: true
10
+ apply_to_ref: true
11
+ enabled: true
12
+ eval_only_on_training: true
13
+ max_duration: 1000
14
+ min_duration: 250
15
+ cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
16
+ db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
17
+ duration: 600.0
18
+ embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
19
+ max_samples: null
20
+ max_val_samples: null
21
+ preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/unbiased_qwen/train.json
22
+ sample_rate: 24000
23
+ use_preextracted: true
24
+ val_preference_file: null
25
+ loss:
26
+ IF_ratio: 0.5
27
+ filter_ties: true
28
+ label_smoothing: 0.0
29
+ reduction: mean
30
+ model:
31
+ attention_mode: CA
32
+ attn_dropout: 0.0
33
+ category_embeddings: null
34
+ dim: 768
35
+ dim_head: 64
36
+ downsample:
37
+ configs:
38
+ conv2_4x:
39
+ factor: 4
40
+ kernel_size: 5
41
+ kind: conv*2
42
+ use_layernorm: true
43
+ conv_4x:
44
+ factor: 4
45
+ kernel_size: 5
46
+ kind: conv
47
+ stage: 1
48
+ use_layernorm: true
49
+ glu_4x:
50
+ factor: 4
51
+ kernel_size: 5
52
+ kind: gluconv*2+pw
53
+ use_layernorm: true
54
+ mean:
55
+ factor: 2
56
+ kind: mean
57
+ mean_4x:
58
+ dropout: 0.0
59
+ factor: 30
60
+ kind: mean+mlp
61
+ mlp_ratio: 2.0
62
+ none:
63
+ factor: 1
64
+ kind: none
65
+ eval: mean_4x
66
+ ref: null
67
+ text: none
68
+ ff_dropout: 0.0
69
+ ff_mult: 4
70
+ freeze_audio: true
71
+ freeze_text: true
72
+ gradient_checkpointing: false
73
+ heads: 8
74
+ joint_tf_depth: 1
75
+ load_config:
76
+ checkpoint_path: null
77
+ frozen_from_pretrained: true
78
+ pretrained_name: OpenMuQ/MuQ-MuLan-large
79
+ strict: false
80
+ mlp_dim: 768
81
+ mode: text_only
82
+ model_name: OpenMuQ/MuQ-MuLan-large
83
+ name: reward
84
+ null_embedding:
85
+ audio:
86
+ dropout: 0.5
87
+ length: 10
88
+ lyrics:
89
+ dropout: 0.3
90
+ length: 10
91
+ text:
92
+ dropout: 0.2
93
+ length: 10
94
+ output_dim: 2
95
+ prompt_tf_depth: 1
96
+ sr: 24000
97
+ text_encoder:
98
+ name: muq_mulan
99
+ tune: null
100
+ text_lora_config: null
101
+ train_muq_depth: 0
102
+ train_muqmulan: false
103
+ use_layer_idx: -1
104
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
105
+ run_name: null
106
+ train:
107
+ batch_size: 24
108
+ betas:
109
+ - 0.9
110
+ - 0.99
111
+ ema_decay: 0.9999
112
+ ema_update_every: 1
113
+ enable_gradient_checkpointing: true
114
+ force_clear_prev_results: false
115
+ grad_accum_every: 2
116
+ log_tensorboard: true
117
+ lr_schedule:
118
+ min_lr_ratio: 0.001
119
+ name: linear_cosine
120
+ total_steps: 30000
121
+ warmup_steps: 300
122
+ max_grad_norm: 100
123
+ mlp_lr: 0.0002
124
+ num_train_steps: 30000
125
+ num_valid_batches: 10
126
+ num_workers: 8
127
+ other_lr: null
128
+ resume: null
129
+ resume_optimizer: false
130
+ save_model_every: 2000
131
+ use_checkpoint_config: false
132
+ use_ema: true
133
+ use_lion: false
134
+ valid_batch_size: 20
135
+ valid_every: 2000
136
+ valid_frac: 0.1
137
+ verify_weights_on_load: true
138
+ validate:
139
+ checkpoint: null
140
+ dir: null
141
+ max_failure_cases: 30
142
+ num_batches: null
143
+ only: false
144
+ trust_checkpoint: true
downstream_mixed_linear/20260122_1143/downstream_config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
2
+ basics:
3
+ random_seed: 42
4
+ save_dir: ${project_root}/experiments/downstream_mixed_linear
5
+ run_name: null
6
+ tasks:
7
+ - musicality
8
+ - alignment
9
+ - preference
10
+ backbone:
11
+ checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
12
+ freeze: false
13
+ freeze_encoder_only: true
14
+ from_ema: false
15
+ dataset:
16
+ train_file: ${project_root}/train_multitask.jsonl
17
+ test_file: ${project_root}/test_multitask.jsonl
18
+ heads:
19
+ hidden_dim: 768
20
+ init_from: null
21
+ musicality:
22
+ use_mlp: false
23
+ ordinal: false
24
+ dropout: 0.0
25
+ num_categories: 9
26
+ y_min: 1.0
27
+ y_max: 5.0
28
+ step: 0.5
29
+ alignment:
30
+ use_mlp: false
31
+ ordinal: false
32
+ dropout: 0.0
33
+ num_categories: 9
34
+ y_min: 1.0
35
+ y_max: 5.0
36
+ step: 0.5
37
+ preference:
38
+ use_mlp: false
39
+ dropout: 0.0
40
+ train:
41
+ dataset_mode: mixed
42
+ num_train_steps: 5000
43
+ batch_size: 32
44
+ learning_rate: 0.0005
45
+ backbone_learning_rate: 1.0e-05
46
+ weight_decay: 0.01
47
+ max_grad_norm: 1.0
48
+ warmup_steps: 200
49
+ schedule_type: cosine
50
+ min_lr_ratio: 0.01
51
+ log_interval: 100
52
+ val_interval: 500
53
+ save_interval: 1000
54
+ num_workers: 8
55
+ resume: null
56
+ device: cuda
downstream_mixed_linear/20260122_1143/predicted_0122_1533.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
downstream_mixed_linear/20260122_1143/train.log ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-22 11:43:55 | INFO | Starting downstream training: 20260122_1143
2
+ 2026-01-22 11:43:55 | INFO | Results will be saved to: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143
3
+ 2026-01-22 11:43:55 | INFO | Config saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/config.yaml
4
+ 2026-01-22 11:43:55 | INFO | Training tasks: ['musicality', 'alignment', 'preference']
5
+ 2026-01-22 11:43:55 | INFO | Dataset mode: mixed
6
+ 2026-01-22 11:43:58 | INFO | Loading backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
7
+ 2026-01-22 11:43:58 | INFO | Using checkpoint config for model
8
+ 2026-01-22 11:44:03 | INFO | Missing keys (794): ['alignment_head.0.weight', 'alignment_head.0.bias', 'alignment_head.1.weight', 'alignment_head.1.bias', 'alignment_head.3.weight']...
9
+ 2026-01-22 11:44:03 | WARNING | Missing keys: 283
10
+ 2026-01-22 11:44:04 | INFO | Loaded backbone from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260118_1213/ckpt/reward_model.best_29999.pt
11
+ 2026-01-22 11:44:04 | INFO | Created MixedDownstreamTaskModel (freeze_encoder_only=True)
12
+ 2026-01-22 11:44:04 | INFO | Added linear head for task 'musicality'
13
+ 2026-01-22 11:44:04 | INFO | Added linear head for task 'alignment'
14
+ 2026-01-22 11:44:04 | INFO | Added linear head for task 'preference'
15
+ 2026-01-22 11:44:04 | INFO | Loading train data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/train_multitask.jsonl
16
+ 2026-01-22 11:44:04 | INFO | Loading test data from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/test_multitask.jsonl
17
+ 2026-01-22 11:44:04 | INFO | Task 'musicality': train=4322, test=913
18
+ 2026-01-22 11:44:04 | INFO | Task 'alignment': train=1923, test=913
19
+ 2026-01-22 11:44:04 | INFO | Task 'preference': train=1065, test=275
20
+ 2026-01-22 11:44:04 | INFO | Backbone trainable parameters: 21,279,237 (lr=1e-05)
21
+ 2026-01-22 11:44:04 | INFO | Head parameters: 2,307 (lr=0.0005)
22
+ 2026-01-22 11:44:04 | INFO | Total trainable parameters: 21,281,544
23
+ 2026-01-22 11:44:04 | INFO | [MIXED MODE] Starting training for 5000 steps
24
+ 2026-01-22 11:44:04 | INFO | Backbone LR: 1e-05, Head LR: 0.0005
25
+ 2026-01-22 11:46:24 | INFO | [Step 100] musicality/loss=2.2005 | musicality/mse=8.3402 | alignment/loss=2.2115 | alignment/mse=9.0544 | preference/loss=0.9264 | preference/accuracy=0.5772 | lr_backbone=5.05e-06 | lr_heads=2.53e-04
26
+ 2026-01-22 11:48:37 | INFO | [Step 200] musicality/loss=0.7967 | musicality/mse=1.0614 | alignment/loss=0.7918 | alignment/mse=1.0462 | preference/loss=0.5578 | preference/accuracy=0.7228 | lr_backbone=1.00e-05 | lr_heads=5.00e-04
27
+ 2026-01-22 11:50:55 | INFO | [Step 300] musicality/loss=0.6328 | musicality/mse=0.6628 | alignment/loss=0.6676 | alignment/mse=0.7203 | preference/loss=0.4710 | preference/accuracy=0.7700 | lr_backbone=9.99e-06 | lr_heads=4.99e-04
28
+ 2026-01-22 11:53:17 | INFO | [Step 400] musicality/loss=0.5768 | musicality/mse=0.5607 | alignment/loss=0.6102 | alignment/mse=0.6049 | preference/loss=0.4418 | preference/accuracy=0.7941 | lr_backbone=9.96e-06 | lr_heads=4.98e-04
29
+ 2026-01-22 11:55:31 | INFO | [Step 500] musicality/loss=0.5430 | musicality/mse=0.4994 | alignment/loss=0.5927 | alignment/mse=0.5801 | preference/loss=0.4014 | preference/accuracy=0.8159 | lr_backbone=9.90e-06 | lr_heads=4.95e-04
30
+ 2026-01-22 11:55:31 | INFO | [Step 500] Running validation...
31
+ 2026-01-22 11:55:40 | INFO | [Val] musicality: loss=1.1473 | mse=1.8126
32
+ 2026-01-22 11:55:47 | INFO | [Val] alignment: loss=1.0390 | mse=1.5827
33
+ 2026-01-22 11:56:00 | INFO | [Val] preference: loss=0.5431 | accuracy=0.7405
34
+ 2026-01-22 11:56:00 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_best.pt (81.2MB, 58 params)
35
+ 2026-01-22 11:56:00 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_best_full.pt (141.3MB, 3 heads)
36
+ 2026-01-22 11:56:00 | INFO | New best model saved (val_loss=0.9098)
37
+ 2026-01-22 11:58:16 | INFO | [Step 600] musicality/loss=0.5325 | musicality/mse=0.4837 | alignment/loss=0.5695 | alignment/mse=0.5422 | preference/loss=0.3828 | preference/accuracy=0.8187 | lr_backbone=9.83e-06 | lr_heads=4.91e-04
38
+ 2026-01-22 12:00:34 | INFO | [Step 700] musicality/loss=0.5178 | musicality/mse=0.4543 | alignment/loss=0.5538 | alignment/mse=0.5039 | preference/loss=0.3556 | preference/accuracy=0.8400 | lr_backbone=9.73e-06 | lr_heads=4.87e-04
39
+ 2026-01-22 12:02:52 | INFO | [Step 800] musicality/loss=0.4982 | musicality/mse=0.4278 | alignment/loss=0.5263 | alignment/mse=0.4646 | preference/loss=0.3258 | preference/accuracy=0.8547 | lr_backbone=9.62e-06 | lr_heads=4.81e-04
40
+ 2026-01-22 12:05:06 | INFO | [Step 900] musicality/loss=0.4885 | musicality/mse=0.4085 | alignment/loss=0.5110 | alignment/mse=0.4444 | preference/loss=0.2978 | preference/accuracy=0.8709 | lr_backbone=9.48e-06 | lr_heads=4.74e-04
41
+ 2026-01-22 12:07:14 | INFO | [Step 1000] musicality/loss=0.4678 | musicality/mse=0.3857 | alignment/loss=0.4986 | alignment/mse=0.4226 | preference/loss=0.2730 | preference/accuracy=0.8916 | lr_backbone=9.33e-06 | lr_heads=4.67e-04
42
+ 2026-01-22 12:07:14 | INFO | [Step 1000] Running validation...
43
+ 2026-01-22 12:07:19 | INFO | [Val] musicality: loss=1.3048 | mse=2.2855
44
+ 2026-01-22 12:07:24 | INFO | [Val] alignment: loss=1.2686 | mse=2.1902
45
+ 2026-01-22 12:07:34 | INFO | [Val] preference: loss=0.6575 | accuracy=0.7058
46
+ 2026-01-22 12:07:34 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_1000.pt (81.2MB, 58 params)
47
+ 2026-01-22 12:07:34 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_1000_full.pt (141.3MB, 3 heads)
48
+ 2026-01-22 12:09:42 | INFO | [Step 1100] musicality/loss=0.4760 | musicality/mse=0.3916 | alignment/loss=0.4768 | alignment/mse=0.3976 | preference/loss=0.2531 | preference/accuracy=0.8972 | lr_backbone=9.16e-06 | lr_heads=4.58e-04
49
+ 2026-01-22 12:11:45 | INFO | [Step 1200] musicality/loss=0.4702 | musicality/mse=0.3893 | alignment/loss=0.4787 | alignment/mse=0.3995 | preference/loss=0.2229 | preference/accuracy=0.9147 | lr_backbone=8.97e-06 | lr_heads=4.48e-04
50
+ 2026-01-22 12:13:48 | INFO | [Step 1300] musicality/loss=0.4608 | musicality/mse=0.3740 | alignment/loss=0.4580 | alignment/mse=0.3734 | preference/loss=0.2015 | preference/accuracy=0.9309 | lr_backbone=8.76e-06 | lr_heads=4.38e-04
51
+ 2026-01-22 12:15:48 | INFO | [Step 1400] musicality/loss=0.4470 | musicality/mse=0.3551 | alignment/loss=0.4462 | alignment/mse=0.3573 | preference/loss=0.1836 | preference/accuracy=0.9369 | lr_backbone=8.54e-06 | lr_heads=4.27e-04
52
+ 2026-01-22 12:17:55 | INFO | [Step 1500] musicality/loss=0.4396 | musicality/mse=0.3439 | alignment/loss=0.4267 | alignment/mse=0.3357 | preference/loss=0.1711 | preference/accuracy=0.9428 | lr_backbone=8.30e-06 | lr_heads=4.15e-04
53
+ 2026-01-22 12:17:55 | INFO | [Step 1500] Running validation...
54
+ 2026-01-22 12:18:00 | INFO | [Val] musicality: loss=1.3053 | mse=2.2794
55
+ 2026-01-22 12:18:05 | INFO | [Val] alignment: loss=1.1733 | mse=1.9250
56
+ 2026-01-22 12:18:12 | INFO | [Val] preference: loss=0.9029 | accuracy=0.6954
57
+ 2026-01-22 12:20:16 | INFO | [Step 1600] musicality/loss=0.4350 | musicality/mse=0.3406 | alignment/loss=0.4201 | alignment/mse=0.3266 | preference/loss=0.1518 | preference/accuracy=0.9556 | lr_backbone=8.04e-06 | lr_heads=4.02e-04
58
+ 2026-01-22 12:22:29 | INFO | [Step 1700] musicality/loss=0.4266 | musicality/mse=0.3288 | alignment/loss=0.4157 | alignment/mse=0.3292 | preference/loss=0.1400 | preference/accuracy=0.9616 | lr_backbone=7.78e-06 | lr_heads=3.89e-04
59
+ 2026-01-22 12:24:36 | INFO | [Step 1800] musicality/loss=0.4175 | musicality/mse=0.3159 | alignment/loss=0.4053 | alignment/mse=0.3053 | preference/loss=0.1269 | preference/accuracy=0.9672 | lr_backbone=7.50e-06 | lr_heads=3.75e-04
60
+ 2026-01-22 12:26:44 | INFO | [Step 1900] musicality/loss=0.4130 | musicality/mse=0.3172 | alignment/loss=0.3933 | alignment/mse=0.2983 | preference/loss=0.1208 | preference/accuracy=0.9647 | lr_backbone=7.21e-06 | lr_heads=3.61e-04
61
+ 2026-01-22 12:28:50 | INFO | [Step 2000] musicality/loss=0.3964 | musicality/mse=0.2923 | alignment/loss=0.3785 | alignment/mse=0.2798 | preference/loss=0.1063 | preference/accuracy=0.9744 | lr_backbone=6.91e-06 | lr_heads=3.46e-04
62
+ 2026-01-22 12:28:50 | INFO | [Step 2000] Running validation...
63
+ 2026-01-22 12:28:54 | INFO | [Val] musicality: loss=1.2472 | mse=2.1547
64
+ 2026-01-22 12:28:59 | INFO | [Val] alignment: loss=1.3002 | mse=2.3432
65
+ 2026-01-22 12:29:06 | INFO | [Val] preference: loss=1.0439 | accuracy=0.6999
66
+ 2026-01-22 12:29:06 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_2000.pt (81.2MB, 58 params)
67
+ 2026-01-22 12:29:07 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_2000_full.pt (141.3MB, 3 heads)
68
+ 2026-01-22 12:31:17 | INFO | [Step 2100] musicality/loss=0.3994 | musicality/mse=0.2960 | alignment/loss=0.3650 | alignment/mse=0.2675 | preference/loss=0.1017 | preference/accuracy=0.9750 | lr_backbone=6.61e-06 | lr_heads=3.30e-04
69
+ 2026-01-22 12:33:21 | INFO | [Step 2200] musicality/loss=0.3913 | musicality/mse=0.2835 | alignment/loss=0.3655 | alignment/mse=0.2679 | preference/loss=0.0847 | preference/accuracy=0.9816 | lr_backbone=6.29e-06 | lr_heads=3.15e-04
70
+ 2026-01-22 12:35:26 | INFO | [Step 2300] musicality/loss=0.3898 | musicality/mse=0.2861 | alignment/loss=0.3502 | alignment/mse=0.2519 | preference/loss=0.0748 | preference/accuracy=0.9838 | lr_backbone=5.98e-06 | lr_heads=2.99e-04
71
+ 2026-01-22 12:37:23 | INFO | [Step 2400] musicality/loss=0.3874 | musicality/mse=0.2812 | alignment/loss=0.3407 | alignment/mse=0.2414 | preference/loss=0.0749 | preference/accuracy=0.9822 | lr_backbone=5.65e-06 | lr_heads=2.83e-04
72
+ 2026-01-22 12:39:29 | INFO | [Step 2500] musicality/loss=0.3718 | musicality/mse=0.2615 | alignment/loss=0.3326 | alignment/mse=0.2398 | preference/loss=0.0760 | preference/accuracy=0.9831 | lr_backbone=5.33e-06 | lr_heads=2.66e-04
73
+ 2026-01-22 12:39:29 | INFO | [Step 2500] Running validation...
74
+ 2026-01-22 12:39:34 | INFO | [Val] musicality: loss=1.3347 | mse=2.3899
75
+ 2026-01-22 12:39:39 | INFO | [Val] alignment: loss=1.2975 | mse=2.3241
76
+ 2026-01-22 12:39:47 | INFO | [Val] preference: loss=1.1918 | accuracy=0.7023
77
+ 2026-01-22 12:41:57 | INFO | [Step 2600] musicality/loss=0.3819 | musicality/mse=0.2791 | alignment/loss=0.3210 | alignment/mse=0.2263 | preference/loss=0.0686 | preference/accuracy=0.9831 | lr_backbone=5.00e-06 | lr_heads=2.50e-04
78
+ 2026-01-22 12:44:04 | INFO | [Step 2700] musicality/loss=0.3627 | musicality/mse=0.2539 | alignment/loss=0.3177 | alignment/mse=0.2255 | preference/loss=0.0612 | preference/accuracy=0.9894 | lr_backbone=4.67e-06 | lr_heads=2.34e-04
79
+ 2026-01-22 12:46:14 | INFO | [Step 2800] musicality/loss=0.3585 | musicality/mse=0.2494 | alignment/loss=0.3108 | alignment/mse=0.2167 | preference/loss=0.0606 | preference/accuracy=0.9888 | lr_backbone=4.35e-06 | lr_heads=2.17e-04
80
+ 2026-01-22 12:48:15 | INFO | [Step 2900] musicality/loss=0.3659 | musicality/mse=0.2576 | alignment/loss=0.3024 | alignment/mse=0.2116 | preference/loss=0.0593 | preference/accuracy=0.9869 | lr_backbone=4.02e-06 | lr_heads=2.01e-04
81
+ 2026-01-22 12:50:20 | INFO | [Step 3000] musicality/loss=0.3553 | musicality/mse=0.2498 | alignment/loss=0.2947 | alignment/mse=0.2077 | preference/loss=0.0585 | preference/accuracy=0.9878 | lr_backbone=3.71e-06 | lr_heads=1.85e-04
82
+ 2026-01-22 12:50:20 | INFO | [Step 3000] Running validation...
83
+ 2026-01-22 12:50:25 | INFO | [Val] musicality: loss=1.3277 | mse=2.3656
84
+ 2026-01-22 12:50:30 | INFO | [Val] alignment: loss=1.2973 | mse=2.3727
85
+ 2026-01-22 12:50:38 | INFO | [Val] preference: loss=1.3603 | accuracy=0.6919
86
+ 2026-01-22 12:50:38 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_3000.pt (81.2MB, 58 params)
87
+ 2026-01-22 12:50:38 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_3000_full.pt (141.3MB, 3 heads)
88
+ 2026-01-22 12:52:38 | INFO | [Step 3100] musicality/loss=0.3486 | musicality/mse=0.2427 | alignment/loss=0.2857 | alignment/mse=0.1987 | preference/loss=0.0523 | preference/accuracy=0.9900 | lr_backbone=3.39e-06 | lr_heads=1.70e-04
89
+ 2026-01-22 12:54:36 | INFO | [Step 3200] musicality/loss=0.3537 | musicality/mse=0.2494 | alignment/loss=0.2805 | alignment/mse=0.1955 | preference/loss=0.0583 | preference/accuracy=0.9869 | lr_backbone=3.09e-06 | lr_heads=1.54e-04
90
+ 2026-01-22 12:56:42 | INFO | [Step 3300] musicality/loss=0.3390 | musicality/mse=0.2315 | alignment/loss=0.2796 | alignment/mse=0.1946 | preference/loss=0.0515 | preference/accuracy=0.9922 | lr_backbone=2.79e-06 | lr_heads=1.39e-04
91
+ 2026-01-22 12:58:57 | INFO | [Step 3400] musicality/loss=0.3474 | musicality/mse=0.2402 | alignment/loss=0.2661 | alignment/mse=0.1839 | preference/loss=0.0536 | preference/accuracy=0.9859 | lr_backbone=2.50e-06 | lr_heads=1.25e-04
92
+ 2026-01-22 13:01:01 | INFO | [Step 3500] musicality/loss=0.3431 | musicality/mse=0.2380 | alignment/loss=0.2742 | alignment/mse=0.1948 | preference/loss=0.0469 | preference/accuracy=0.9916 | lr_backbone=2.22e-06 | lr_heads=1.11e-04
93
+ 2026-01-22 13:01:01 | INFO | [Step 3500] Running validation...
94
+ 2026-01-22 13:01:05 | INFO | [Val] musicality: loss=1.3314 | mse=2.3666
95
+ 2026-01-22 13:01:09 | INFO | [Val] alignment: loss=1.3121 | mse=2.4192
96
+ 2026-01-22 13:01:18 | INFO | [Val] preference: loss=1.3744 | accuracy=0.6954
97
+ 2026-01-22 13:03:23 | INFO | [Step 3600] musicality/loss=0.3315 | musicality/mse=0.2288 | alignment/loss=0.2630 | alignment/mse=0.1819 | preference/loss=0.0501 | preference/accuracy=0.9897 | lr_backbone=1.96e-06 | lr_heads=9.78e-05
98
+ 2026-01-22 13:05:35 | INFO | [Step 3700] musicality/loss=0.3355 | musicality/mse=0.2339 | alignment/loss=0.2564 | alignment/mse=0.1801 | preference/loss=0.0435 | preference/accuracy=0.9931 | lr_backbone=1.70e-06 | lr_heads=8.52e-05
99
+ 2026-01-22 13:07:35 | INFO | [Step 3800] musicality/loss=0.3233 | musicality/mse=0.2131 | alignment/loss=0.2572 | alignment/mse=0.1828 | preference/loss=0.0474 | preference/accuracy=0.9916 | lr_backbone=1.46e-06 | lr_heads=7.32e-05
100
+ 2026-01-22 13:09:36 | INFO | [Step 3900] musicality/loss=0.3264 | musicality/mse=0.2250 | alignment/loss=0.2501 | alignment/mse=0.1753 | preference/loss=0.0467 | preference/accuracy=0.9891 | lr_backbone=1.24e-06 | lr_heads=6.20e-05
101
+ 2026-01-22 13:11:37 | INFO | [Step 4000] musicality/loss=0.3311 | musicality/mse=0.2319 | alignment/loss=0.2497 | alignment/mse=0.1790 | preference/loss=0.0453 | preference/accuracy=0.9909 | lr_backbone=1.03e-06 | lr_heads=5.17e-05
102
+ 2026-01-22 13:11:37 | INFO | [Step 4000] Running validation...
103
+ 2026-01-22 13:11:41 | INFO | [Val] musicality: loss=1.3147 | mse=2.3406
104
+ 2026-01-22 13:11:45 | INFO | [Val] alignment: loss=1.3227 | mse=2.4580
105
+ 2026-01-22 13:11:53 | INFO | [Val] preference: loss=1.4434 | accuracy=0.6954
106
+ 2026-01-22 13:11:53 | INFO | Saved RewardModel checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_4000.pt (81.2MB, 58 params)
107
+ 2026-01-22 13:11:53 | INFO | Saved full training checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/downstream_mixed_linear/20260122_1143/ckpt/mixed_step_4000_full.pt (141.3MB, 3 heads)
108
+ 2026-01-22 13:13:52 | INFO | [Step 4100] musicality/loss=0.3189 | musicality/mse=0.2125 | alignment/loss=0.2453 | alignment/mse=0.1733 | preference/loss=0.0447 | preference/accuracy=0.9922 | lr_backbone=8.43e-07 | lr_heads=4.21e-05
109
+ 2026-01-22 13:15:55 | INFO | [Step 4200] musicality/loss=0.3213 | musicality/mse=0.2174 | alignment/loss=0.2428 | alignment/mse=0.1738 | preference/loss=0.0405 | preference/accuracy=0.9934 | lr_backbone=6.70e-07 | lr_heads=3.35e-05
110
+ 2026-01-22 13:17:53 | INFO | [Step 4300] musicality/loss=0.3200 | musicality/mse=0.2224 | alignment/loss=0.2357 | alignment/mse=0.1659 | preference/loss=0.0388 | preference/accuracy=0.9941 | lr_backbone=5.16e-07 | lr_heads=2.58e-05
111
+ 2026-01-22 13:19:56 | INFO | [Step 4400] musicality/loss=0.3121 | musicality/mse=0.2100 | alignment/loss=0.2416 | alignment/mse=0.1771 | preference/loss=0.0408 | preference/accuracy=0.9950 | lr_backbone=3.81e-07 | lr_heads=1.90e-05
finetune_human/20260124_2143/config.yaml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DEVICES: '3'
2
+ accelerate:
3
+ mixed_precision: bf16
4
+ basics:
5
+ random_seed: 42
6
+ save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
7
+ dataset:
8
+ audio_dropout:
9
+ apply_to_eval: false
10
+ apply_to_ref: true
11
+ enabled: false
12
+ eval_only_on_training: true
13
+ max_duration: 1500
14
+ min_duration: 1500
15
+ train_mode: start
16
+ cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
17
+ db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
18
+ duration: 600.0
19
+ embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
20
+ max_samples: null
21
+ max_val_samples: null
22
+ metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
23
+ mode: raw_text_frozen_audio
24
+ preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
25
+ sample_rate: 24000
26
+ val_preference_file: null
27
+ loss:
28
+ IF_ratio: 0.5
29
+ filter_ties: true
30
+ label_smoothing: 0.0
31
+ reduction: mean
32
+ model:
33
+ attention_mode: SA
34
+ attn_dropout: 0.0
35
+ category_embeddings: null
36
+ dim: 768
37
+ dim_head: 64
38
+ downsample:
39
+ configs:
40
+ conv2_4x:
41
+ factor: 4
42
+ kernel_size: 5
43
+ kind: conv*2
44
+ use_layernorm: true
45
+ conv_4x:
46
+ factor: 4
47
+ kernel_size: 5
48
+ kind: conv
49
+ stage: 1
50
+ use_layernorm: true
51
+ glu_4x:
52
+ factor: 4
53
+ kernel_size: 5
54
+ kind: gluconv*2+pw
55
+ use_layernorm: true
56
+ mean:
57
+ factor: 2
58
+ kind: mean
59
+ mean_4x:
60
+ dropout: 0.0
61
+ factor: 30
62
+ kind: mean+mlp
63
+ mlp_ratio: 2.0
64
+ none:
65
+ factor: 1
66
+ kind: none
67
+ eval: mean_4x
68
+ ref: null
69
+ text: none
70
+ ff_dropout: 0.0
71
+ ff_mult: 4
72
+ freeze_audio: true
73
+ freeze_text: true
74
+ gradient_checkpointing: false
75
+ heads: 8
76
+ joint_tf_depth: 1
77
+ load_config:
78
+ checkpoint_path: null
79
+ frozen_from_pretrained: true
80
+ pretrained_name: OpenMuQ/MuQ-MuLan-large
81
+ strict: false
82
+ mlp_dim: 768
83
+ mode: concat_text_late
84
+ model_name: OpenMuQ/MuQ-MuLan-large
85
+ name: reward
86
+ no_condition: false
87
+ null_embedding:
88
+ audio:
89
+ dropout: 0.5
90
+ length: 10
91
+ lyrics:
92
+ dropout: 0.3
93
+ length: 10
94
+ text:
95
+ dropout: 0
96
+ length: 10
97
+ output_dim: 2
98
+ prompt_tf_depth: 4
99
+ sr: 24000
100
+ text_encoder:
101
+ name: muq_mulan
102
+ tune: null
103
+ text_lora_config: null
104
+ train_muq_depth: 0
105
+ train_muqmulan: false
106
+ use_audio: true
107
+ use_layer_idx: -1
108
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
109
+ run_name: null
110
+ train:
111
+ batch_size: 48
112
+ betas:
113
+ - 0.9
114
+ - 0.99
115
+ ema_decay: 0.9999
116
+ ema_update_every: 1
117
+ enable_gradient_checkpointing: true
118
+ force_clear_prev_results: false
119
+ grad_accum_every: 1
120
+ log_tensorboard: true
121
+ lr_schedule:
122
+ min_lr_ratio: 0.001
123
+ name: linear_cosine
124
+ total_steps: 4000
125
+ warmup_steps: 300
126
+ max_grad_norm: 1
127
+ mlp_lr: 0.0001
128
+ num_train_steps: 4000
129
+ num_valid_batches: null
130
+ num_workers: 8
131
+ other_lr: 1.0e-05
132
+ resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
133
+ resume_optimizer: false
134
+ save_model_every: 2000
135
+ use_checkpoint_config: true
136
+ use_ema: true
137
+ use_lion: false
138
+ valid_batch_size: 20
139
+ valid_every: 100
140
+ valid_frac: 0.1
141
+ verify_weights_on_load: true
142
+ validate_only: false
finetune_human/20260124_2143/reward_model/1769262210.5061178/events.out.tfevents.1769262210.MACLAB-S004.2626926.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82ee766b07252644d7045f50ffd3d29ed1cbc0b26a834bdb1d855c526f959108
3
+ size 503
finetune_human/20260124_2143/reward_model/1769262210.5078583/hparams.yml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ batch_size: 48
2
+ grad_accum_every: 1
3
+ learning_rate: 0.0001
4
+ num_train_steps: 4000
finetune_human/20260124_2143/reward_model/events.out.tfevents.1769262210.MACLAB-S004.2626926.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:361130a96e5393eb1f50a4f818c47547a16295e3f01976ce0e9113e0a561cf68
3
+ size 2219689
finetune_human/20260124_2143/train.20260124_2143.log ADDED
@@ -0,0 +1,803 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-24 21:43:19 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/train.20260124_2143.log
2
+ 2026-01-24 21:43:19 | INFO | Random seed set to 42
3
+ 2026-01-24 21:43:21 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
4
+ 2026-01-24 21:43:21 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
5
+ 2026-01-24 21:43:21 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
6
+ 2026-01-24 21:43:21 | INFO | Using checkpoint config for model initialization (continue training mode)
7
+ 2026-01-24 21:43:29 | INFO | Created RewardAttentionModel with attention_mode=SA
8
+ 2026-01-24 21:43:29 | INFO | Created PreferenceLoss with filter_ties=True
9
+ 2026-01-24 21:43:29 | INFO | ✓ Gradient checkpointing enabled
10
+ 2026-01-24 21:43:29 | INFO | ✓ EMA enabled with decay=0.9999, update_every=1 (CPU offload)
11
+ 2026-01-24 21:43:29 | INFO | MLP head parameters: 1,186,563 params, lr=0.0001
12
+ 2026-01-24 21:43:29 | INFO | Other parameters: 37,397,634 params, lr=1e-05
13
+ 2026-01-24 21:43:29 | INFO | Using lr_schedule=linear_cosine warmup_steps=300 total_steps=4000
14
+ 2026-01-24 21:43:29 | INFO | Training with fixed validation set
15
+ 2026-01-24 21:43:29 | INFO | Train batch_size: 48, Valid batch_size: 20
16
+ 2026-01-24 21:43:29 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
17
+ 2026-01-24 21:43:29 | INFO | ✓ EMA state loaded
18
+ 2026-01-24 21:43:29 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=29999)
19
+ 2026-01-24 21:43:29 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
20
+ 2026-01-24 21:43:29 | INFO | Parameters: 701.162M total, 38.584M trainable
21
+ 2026-01-24 21:43:29 | INFO | Text encoder (frozen): 328.389M
22
+ 2026-01-24 21:43:29 | INFO | Audio encoder (frozen): 334.189M
23
+ 2026-01-24 21:43:29 | INFO | Other trainable: 38.584M
24
+ 2026-01-24 21:43:29 | INFO | ℹ No LoRA configuration detected
25
+ 2026-01-24 21:43:30 | INFO | ============================================================
26
+ 2026-01-24 21:43:30 | INFO | Ready to start training
27
+ 2026-01-24 21:43:30 | INFO | ============================================================
28
+ 2026-01-24 21:43:30 | INFO | Starting training from step 0
29
+ 2026-01-24 21:43:30 | INFO | ===== Accelerator / CUDA Debug Info =====
30
+ 2026-01-24 21:43:30 | INFO | accelerator.device = cuda
31
+ 2026-01-24 21:43:30 | INFO | mixed_precision = bf16
32
+ 2026-01-24 21:43:30 | INFO | distributed_type = NO
33
+ 2026-01-24 21:43:30 | INFO | num_processes = 1
34
+ 2026-01-24 21:43:30 | INFO | process_index = 0
35
+ 2026-01-24 21:43:30 | INFO | is_main_process = True
36
+ 2026-01-24 21:43:30 | INFO | torch.cuda.is_available() = True
37
+ 2026-01-24 21:43:30 | INFO | torch.cuda.device_count() = 1
38
+ 2026-01-24 21:43:30 | INFO | current_device = 0
39
+ 2026-01-24 21:43:30 | INFO | device_name = NVIDIA GeForce RTX 4090
40
+ 2026-01-24 21:43:30 | INFO | model parameter device = cuda:0
41
+ 2026-01-24 21:43:30 | INFO | Training for 4000.0 steps (~63 epochs, 64 steps/epoch)
42
+ 2026-01-24 21:43:38 | INFO | Step 0: loss=1.6133 | IF_loss=2.2461, MQ_loss=0.9805 | acc=0.740 (IF=0.708, MQ=0.771) | lr=0.000001
43
+ 2026-01-24 21:43:38 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
44
+ 2026-01-24 21:43:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt (575.2MB)
45
+ 2026-01-24 21:43:39 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt
46
+ 2026-01-24 21:45:32 | INFO |
47
+ ============================================================
48
+ Validation Results (took 9.56s):
49
+ Samples: 346 instruction, 346 quality
50
+ Instruction Acc: 0.6821
51
+ Quality Acc: 0.6387
52
+ Average Acc: 0.6604
53
+ Total Loss: 1.8726
54
+ Instruction Loss: 1.6586
55
+ Quality Loss: 2.0866
56
+ ============================================================
57
+ 2026-01-24 21:45:32 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
58
+ 2026-01-24 21:45:33 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt (575.2MB)
59
+ 2026-01-24 21:45:33 | INFO | Best 1 checkpoints:
60
+ 2026-01-24 21:45:33 | INFO | 1. Step 99: acc=0.6604 (reward_model.best_99.pt)
61
+ 2026-01-24 21:45:34 | INFO | Step 100: loss=1.5309 | IF_loss=1.2373, MQ_loss=1.8246 | acc=0.646 (IF=0.688, MQ=0.604) | lr=0.000034
62
+ 2026-01-24 21:47:29 | INFO |
63
+ ============================================================
64
+ Validation Results (took 8.11s):
65
+ Samples: 346 instruction, 346 quality
66
+ Instruction Acc: 0.6850
67
+ Quality Acc: 0.6387
68
+ Average Acc: 0.6618
69
+ Total Loss: 1.8631
70
+ Instruction Loss: 1.6525
71
+ Quality Loss: 2.0736
72
+ ============================================================
73
+ 2026-01-24 21:47:29 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
74
+ 2026-01-24 21:47:30 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt (575.2MB)
75
+ 2026-01-24 21:47:30 | INFO | Best 2 checkpoints:
76
+ 2026-01-24 21:47:30 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
77
+ 2026-01-24 21:47:30 | INFO | 2. Step 99: acc=0.6604 (reward_model.best_99.pt)
78
+ 2026-01-24 21:47:31 | INFO | Step 200: loss=0.4360 | IF_loss=0.4299, MQ_loss=0.4421 | acc=0.833 (IF=0.812, MQ=0.854) | lr=0.000067
79
+ 2026-01-24 21:49:25 | INFO |
80
+ ============================================================
81
+ Validation Results (took 9.42s):
82
+ Samples: 346 instruction, 346 quality
83
+ Instruction Acc: 0.6850
84
+ Quality Acc: 0.6387
85
+ Average Acc: 0.6618
86
+ Total Loss: 1.8438
87
+ Instruction Loss: 1.6364
88
+ Quality Loss: 2.0512
89
+ ============================================================
90
+ 2026-01-24 21:49:25 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
91
+ 2026-01-24 21:49:25 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt (575.2MB)
92
+ 2026-01-24 21:49:25 | INFO | Best 3 checkpoints:
93
+ 2026-01-24 21:49:25 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
94
+ 2026-01-24 21:49:25 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
95
+ 2026-01-24 21:49:25 | INFO | 3. Step 99: acc=0.6604 (reward_model.best_99.pt)
96
+ 2026-01-24 21:49:26 | INFO | Step 300: loss=0.4121 | IF_loss=0.5007, MQ_loss=0.3235 | acc=0.844 (IF=0.792, MQ=0.896) | lr=0.000100
97
+ 2026-01-24 21:51:23 | INFO |
98
+ ============================================================
99
+ Validation Results (took 7.32s):
100
+ Samples: 346 instruction, 346 quality
101
+ Instruction Acc: 0.6850
102
+ Quality Acc: 0.6387
103
+ Average Acc: 0.6618
104
+ Total Loss: 1.8266
105
+ Instruction Loss: 1.6230
106
+ Quality Loss: 2.0303
107
+ ============================================================
108
+ 2026-01-24 21:51:23 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
109
+ 2026-01-24 21:51:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt (575.2MB)
110
+ 2026-01-24 21:51:24 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt
111
+ 2026-01-24 21:51:24 | INFO | Best 3 checkpoints:
112
+ 2026-01-24 21:51:24 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
113
+ 2026-01-24 21:51:24 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
114
+ 2026-01-24 21:51:24 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
115
+ 2026-01-24 21:51:25 | INFO | Step 400: loss=0.4819 | IF_loss=0.4988, MQ_loss=0.4650 | acc=0.760 (IF=0.708, MQ=0.812) | lr=0.000100
116
+ 2026-01-24 21:53:18 | INFO |
117
+ ============================================================
118
+ Validation Results (took 8.30s):
119
+ Samples: 346 instruction, 346 quality
120
+ Instruction Acc: 0.6821
121
+ Quality Acc: 0.6416
122
+ Average Acc: 0.6618
123
+ Total Loss: 1.8103
124
+ Instruction Loss: 1.6100
125
+ Quality Loss: 2.0107
126
+ ============================================================
127
+ 2026-01-24 21:53:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
128
+ 2026-01-24 21:53:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt (575.2MB)
129
+ 2026-01-24 21:53:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt
130
+ 2026-01-24 21:53:19 | INFO | Best 3 checkpoints:
131
+ 2026-01-24 21:53:19 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
132
+ 2026-01-24 21:53:19 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
133
+ 2026-01-24 21:53:19 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
134
+ 2026-01-24 21:53:20 | INFO | Step 500: loss=0.4074 | IF_loss=0.4939, MQ_loss=0.3209 | acc=0.854 (IF=0.792, MQ=0.917) | lr=0.000099
135
+ 2026-01-24 21:55:17 | INFO |
136
+ ============================================================
137
+ Validation Results (took 7.55s):
138
+ Samples: 346 instruction, 346 quality
139
+ Instruction Acc: 0.6821
140
+ Quality Acc: 0.6416
141
+ Average Acc: 0.6618
142
+ Total Loss: 1.7951
143
+ Instruction Loss: 1.5986
144
+ Quality Loss: 1.9916
145
+ ============================================================
146
+ 2026-01-24 21:55:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
147
+ 2026-01-24 21:55:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt (575.2MB)
148
+ 2026-01-24 21:55:17 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt
149
+ 2026-01-24 21:55:17 | INFO | Best 3 checkpoints:
150
+ 2026-01-24 21:55:17 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
151
+ 2026-01-24 21:55:17 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
152
+ 2026-01-24 21:55:17 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
153
+ 2026-01-24 21:55:18 | INFO | Step 600: loss=0.3505 | IF_loss=0.3784, MQ_loss=0.3226 | acc=0.844 (IF=0.812, MQ=0.875) | lr=0.000098
154
+ 2026-01-24 21:57:14 | INFO |
155
+ ============================================================
156
+ Validation Results (took 7.89s):
157
+ Samples: 346 instruction, 346 quality
158
+ Instruction Acc: 0.6821
159
+ Quality Acc: 0.6445
160
+ Average Acc: 0.6633
161
+ Total Loss: 1.7807
162
+ Instruction Loss: 1.5876
163
+ Quality Loss: 1.9739
164
+ ============================================================
165
+ 2026-01-24 21:57:14 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
166
+ 2026-01-24 21:57:14 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt (575.2MB)
167
+ 2026-01-24 21:57:14 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt
168
+ 2026-01-24 21:57:14 | INFO | Best 3 checkpoints:
169
+ 2026-01-24 21:57:14 | INFO | 1. Step 699: acc=0.6633 (reward_model.best_699.pt)
170
+ 2026-01-24 21:57:14 | INFO | 2. Step 199: acc=0.6618 (reward_model.best_199.pt)
171
+ 2026-01-24 21:57:14 | INFO | 3. Step 299: acc=0.6618 (reward_model.best_299.pt)
172
+ 2026-01-24 21:57:15 | INFO | Step 700: loss=0.2439 | IF_loss=0.3054, MQ_loss=0.1823 | acc=0.875 (IF=0.854, MQ=0.896) | lr=0.000097
173
+ 2026-01-24 21:59:13 | INFO |
174
+ ============================================================
175
+ Validation Results (took 7.71s):
176
+ Samples: 346 instruction, 346 quality
177
+ Instruction Acc: 0.6821
178
+ Quality Acc: 0.6474
179
+ Average Acc: 0.6647
180
+ Total Loss: 1.7686
181
+ Instruction Loss: 1.5780
182
+ Quality Loss: 1.9591
183
+ ============================================================
184
+ 2026-01-24 21:59:13 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
185
+ 2026-01-24 21:59:13 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt (575.2MB)
186
+ 2026-01-24 21:59:13 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt
187
+ 2026-01-24 21:59:13 | INFO | Best 3 checkpoints:
188
+ 2026-01-24 21:59:13 | INFO | 1. Step 799: acc=0.6647 (reward_model.best_799.pt)
189
+ 2026-01-24 21:59:13 | INFO | 2. Step 699: acc=0.6633 (reward_model.best_699.pt)
190
+ 2026-01-24 21:59:13 | INFO | 3. Step 199: acc=0.6618 (reward_model.best_199.pt)
191
+ 2026-01-24 21:59:14 | INFO | Step 800: loss=0.2827 | IF_loss=0.3525, MQ_loss=0.2128 | acc=0.885 (IF=0.875, MQ=0.896) | lr=0.000096
192
+ 2026-01-24 22:01:11 | INFO |
193
+ ============================================================
194
+ Validation Results (took 7.05s):
195
+ Samples: 346 instruction, 346 quality
196
+ Instruction Acc: 0.6850
197
+ Quality Acc: 0.6474
198
+ Average Acc: 0.6662
199
+ Total Loss: 1.7570
200
+ Instruction Loss: 1.5693
201
+ Quality Loss: 1.9446
202
+ ============================================================
203
+ 2026-01-24 22:01:11 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
204
+ 2026-01-24 22:01:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt (575.2MB)
205
+ 2026-01-24 22:01:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt
206
+ 2026-01-24 22:01:12 | INFO | Best 3 checkpoints:
207
+ 2026-01-24 22:01:12 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
208
+ 2026-01-24 22:01:12 | INFO | 2. Step 799: acc=0.6647 (reward_model.best_799.pt)
209
+ 2026-01-24 22:01:12 | INFO | 3. Step 699: acc=0.6633 (reward_model.best_699.pt)
210
+ 2026-01-24 22:01:13 | INFO | Step 900: loss=0.1525 | IF_loss=0.1838, MQ_loss=0.1212 | acc=0.958 (IF=0.958, MQ=0.958) | lr=0.000094
211
+ 2026-01-24 22:03:07 | INFO |
212
+ ============================================================
213
+ Validation Results (took 7.74s):
214
+ Samples: 346 instruction, 346 quality
215
+ Instruction Acc: 0.6821
216
+ Quality Acc: 0.6474
217
+ Average Acc: 0.6647
218
+ Total Loss: 1.7472
219
+ Instruction Loss: 1.5625
220
+ Quality Loss: 1.9319
221
+ ============================================================
222
+ 2026-01-24 22:03:07 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt (filtered to 38.584M trainable parameters)
223
+ 2026-01-24 22:03:08 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt (575.2MB)
224
+ 2026-01-24 22:03:08 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt
225
+ 2026-01-24 22:03:08 | INFO | Best 3 checkpoints:
226
+ 2026-01-24 22:03:08 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
227
+ 2026-01-24 22:03:08 | INFO | 2. Step 799: acc=0.6647 (reward_model.best_799.pt)
228
+ 2026-01-24 22:03:08 | INFO | 3. Step 999: acc=0.6647 (reward_model.best_999.pt)
229
+ 2026-01-24 22:03:09 | INFO | Step 1000: loss=0.1671 | IF_loss=0.1673, MQ_loss=0.1668 | acc=0.969 (IF=0.979, MQ=0.958) | lr=0.000091
230
+ 2026-01-24 22:05:04 | INFO |
231
+ ============================================================
232
+ Validation Results (took 6.94s):
233
+ Samples: 346 instruction, 346 quality
234
+ Instruction Acc: 0.6850
235
+ Quality Acc: 0.6474
236
+ Average Acc: 0.6662
237
+ Total Loss: 1.7380
238
+ Instruction Loss: 1.5555
239
+ Quality Loss: 1.9205
240
+ ============================================================
241
+ 2026-01-24 22:05:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt (filtered to 38.584M trainable parameters)
242
+ 2026-01-24 22:05:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt (575.2MB)
243
+ 2026-01-24 22:05:04 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt
244
+ 2026-01-24 22:05:04 | INFO | Best 3 checkpoints:
245
+ 2026-01-24 22:05:04 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
246
+ 2026-01-24 22:05:04 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
247
+ 2026-01-24 22:05:04 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
248
+ 2026-01-24 22:05:05 | INFO | Step 1100: loss=0.1267 | IF_loss=0.1381, MQ_loss=0.1154 | acc=0.948 (IF=0.958, MQ=0.938) | lr=0.000089
249
+ 2026-01-24 22:07:02 | INFO |
250
+ ============================================================
251
+ Validation Results (took 7.34s):
252
+ Samples: 346 instruction, 346 quality
253
+ Instruction Acc: 0.6850
254
+ Quality Acc: 0.6416
255
+ Average Acc: 0.6633
256
+ Total Loss: 1.7320
257
+ Instruction Loss: 1.5520
258
+ Quality Loss: 1.9119
259
+ ============================================================
260
+ 2026-01-24 22:07:02 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt (filtered to 38.584M trainable parameters)
261
+ 2026-01-24 22:07:03 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt (575.2MB)
262
+ 2026-01-24 22:07:03 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt
263
+ 2026-01-24 22:07:03 | INFO | Best 3 checkpoints:
264
+ 2026-01-24 22:07:03 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
265
+ 2026-01-24 22:07:03 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
266
+ 2026-01-24 22:07:03 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
267
+ 2026-01-24 22:07:04 | INFO | Step 1200: loss=0.1201 | IF_loss=0.1744, MQ_loss=0.0657 | acc=0.948 (IF=0.917, MQ=0.979) | lr=0.000086
268
+ 2026-01-24 22:08:59 | INFO |
269
+ ============================================================
270
+ Validation Results (took 7.61s):
271
+ Samples: 346 instruction, 346 quality
272
+ Instruction Acc: 0.6850
273
+ Quality Acc: 0.6416
274
+ Average Acc: 0.6633
275
+ Total Loss: 1.7259
276
+ Instruction Loss: 1.5481
277
+ Quality Loss: 1.9036
278
+ ============================================================
279
+ 2026-01-24 22:08:59 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt (filtered to 38.584M trainable parameters)
280
+ 2026-01-24 22:09:00 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt (575.2MB)
281
+ 2026-01-24 22:09:00 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt
282
+ 2026-01-24 22:09:00 | INFO | Best 3 checkpoints:
283
+ 2026-01-24 22:09:00 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
284
+ 2026-01-24 22:09:00 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
285
+ 2026-01-24 22:09:00 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
286
+ 2026-01-24 22:09:01 | INFO | Step 1300: loss=0.0937 | IF_loss=0.1357, MQ_loss=0.0516 | acc=0.958 (IF=0.938, MQ=0.979) | lr=0.000083
287
+ 2026-01-24 22:10:53 | INFO |
288
+ ============================================================
289
+ Validation Results (took 7.24s):
290
+ Samples: 346 instruction, 346 quality
291
+ Instruction Acc: 0.6850
292
+ Quality Acc: 0.6416
293
+ Average Acc: 0.6633
294
+ Total Loss: 1.7217
295
+ Instruction Loss: 1.5459
296
+ Quality Loss: 1.8975
297
+ ============================================================
298
+ 2026-01-24 22:10:53 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt (filtered to 38.584M trainable parameters)
299
+ 2026-01-24 22:10:54 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt (575.2MB)
300
+ 2026-01-24 22:10:54 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt
301
+ 2026-01-24 22:10:54 | INFO | Best 3 checkpoints:
302
+ 2026-01-24 22:10:54 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
303
+ 2026-01-24 22:10:54 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
304
+ 2026-01-24 22:10:54 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
305
+ 2026-01-24 22:10:55 | INFO | Step 1400: loss=0.0782 | IF_loss=0.1080, MQ_loss=0.0484 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000080
306
+ 2026-01-24 22:12:49 | INFO |
307
+ ============================================================
308
+ Validation Results (took 7.28s):
309
+ Samples: 346 instruction, 346 quality
310
+ Instruction Acc: 0.6908
311
+ Quality Acc: 0.6416
312
+ Average Acc: 0.6662
313
+ Total Loss: 1.7182
314
+ Instruction Loss: 1.5441
315
+ Quality Loss: 1.8922
316
+ ============================================================
317
+ 2026-01-24 22:12:49 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt (filtered to 38.584M trainable parameters)
318
+ 2026-01-24 22:12:49 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt (575.2MB)
319
+ 2026-01-24 22:12:49 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt
320
+ 2026-01-24 22:12:49 | INFO | Best 3 checkpoints:
321
+ 2026-01-24 22:12:49 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
322
+ 2026-01-24 22:12:49 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
323
+ 2026-01-24 22:12:49 | INFO | 3. Step 1499: acc=0.6662 (reward_model.best_1499.pt)
324
+ 2026-01-24 22:12:50 | INFO | Step 1500: loss=0.0699 | IF_loss=0.0703, MQ_loss=0.0695 | acc=0.979 (IF=0.979, MQ=0.979) | lr=0.000076
325
+ 2026-01-24 22:14:41 | INFO |
326
+ ============================================================
327
+ Validation Results (took 7.76s):
328
+ Samples: 346 instruction, 346 quality
329
+ Instruction Acc: 0.6908
330
+ Quality Acc: 0.6416
331
+ Average Acc: 0.6662
332
+ Total Loss: 1.7151
333
+ Instruction Loss: 1.5435
334
+ Quality Loss: 1.8867
335
+ ============================================================
336
+ 2026-01-24 22:14:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt (filtered to 38.584M trainable parameters)
337
+ 2026-01-24 22:14:42 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt (575.2MB)
338
+ 2026-01-24 22:14:42 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt
339
+ 2026-01-24 22:14:42 | INFO | Best 3 checkpoints:
340
+ 2026-01-24 22:14:42 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
341
+ 2026-01-24 22:14:42 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
342
+ 2026-01-24 22:14:42 | INFO | 3. Step 1499: acc=0.6662 (reward_model.best_1499.pt)
343
+ 2026-01-24 22:14:46 | INFO | Step 1600: loss=0.0346 | IF_loss=0.0421, MQ_loss=0.0272 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000072
344
+ 2026-01-24 22:16:34 | INFO |
345
+ ============================================================
346
+ Validation Results (took 7.04s):
347
+ Samples: 346 instruction, 346 quality
348
+ Instruction Acc: 0.6908
349
+ Quality Acc: 0.6445
350
+ Average Acc: 0.6676
351
+ Total Loss: 1.7117
352
+ Instruction Loss: 1.5434
353
+ Quality Loss: 1.8800
354
+ ============================================================
355
+ 2026-01-24 22:16:34 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt (filtered to 38.584M trainable parameters)
356
+ 2026-01-24 22:16:35 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt (575.2MB)
357
+ 2026-01-24 22:16:35 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt
358
+ 2026-01-24 22:16:35 | INFO | Best 3 checkpoints:
359
+ 2026-01-24 22:16:35 | INFO | 1. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
360
+ 2026-01-24 22:16:35 | INFO | 2. Step 899: acc=0.6662 (reward_model.best_899.pt)
361
+ 2026-01-24 22:16:35 | INFO | 3. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
362
+ 2026-01-24 22:16:36 | INFO | Step 1700: loss=0.0480 | IF_loss=0.0609, MQ_loss=0.0350 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000069
363
+ 2026-01-24 22:18:30 | INFO |
364
+ ============================================================
365
+ Validation Results (took 7.06s):
366
+ Samples: 346 instruction, 346 quality
367
+ Instruction Acc: 0.6936
368
+ Quality Acc: 0.6445
369
+ Average Acc: 0.6691
370
+ Total Loss: 1.7110
371
+ Instruction Loss: 1.5436
372
+ Quality Loss: 1.8783
373
+ ============================================================
374
+ 2026-01-24 22:18:30 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt (filtered to 38.584M trainable parameters)
375
+ 2026-01-24 22:18:30 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt (575.2MB)
376
+ 2026-01-24 22:18:30 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt
377
+ 2026-01-24 22:18:30 | INFO | Best 3 checkpoints:
378
+ 2026-01-24 22:18:30 | INFO | 1. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
379
+ 2026-01-24 22:18:30 | INFO | 2. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
380
+ 2026-01-24 22:18:30 | INFO | 3. Step 899: acc=0.6662 (reward_model.best_899.pt)
381
+ 2026-01-24 22:18:32 | INFO | Step 1800: loss=0.0316 | IF_loss=0.0473, MQ_loss=0.0159 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000065
382
+ 2026-01-24 22:20:24 | INFO |
383
+ ============================================================
384
+ Validation Results (took 7.18s):
385
+ Samples: 346 instruction, 346 quality
386
+ Instruction Acc: 0.6908
387
+ Quality Acc: 0.6474
388
+ Average Acc: 0.6691
389
+ Total Loss: 1.7090
390
+ Instruction Loss: 1.5445
391
+ Quality Loss: 1.8734
392
+ ============================================================
393
+ 2026-01-24 22:20:24 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt (filtered to 38.584M trainable parameters)
394
+ 2026-01-24 22:20:25 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt (575.2MB)
395
+ 2026-01-24 22:20:25 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt
396
+ 2026-01-24 22:20:25 | INFO | Best 3 checkpoints:
397
+ 2026-01-24 22:20:25 | INFO | 1. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
398
+ 2026-01-24 22:20:25 | INFO | 2. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
399
+ 2026-01-24 22:20:25 | INFO | 3. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
400
+ 2026-01-24 22:20:26 | INFO | Step 1900: loss=0.0415 | IF_loss=0.0539, MQ_loss=0.0290 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000061
401
+ 2026-01-24 22:22:22 | INFO |
402
+ ============================================================
403
+ Validation Results (took 7.27s):
404
+ Samples: 346 instruction, 346 quality
405
+ Instruction Acc: 0.6936
406
+ Quality Acc: 0.6474
407
+ Average Acc: 0.6705
408
+ Total Loss: 1.7083
409
+ Instruction Loss: 1.5455
410
+ Quality Loss: 1.8711
411
+ ============================================================
412
+ 2026-01-24 22:22:22 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt (filtered to 38.584M trainable parameters)
413
+ 2026-01-24 22:22:22 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt (575.2MB)
414
+ 2026-01-24 22:22:22 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt
415
+ 2026-01-24 22:22:22 | INFO | Best 3 checkpoints:
416
+ 2026-01-24 22:22:22 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
417
+ 2026-01-24 22:22:22 | INFO | 2. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
418
+ 2026-01-24 22:22:22 | INFO | 3. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
419
+ 2026-01-24 22:22:23 | INFO | Step 2000: loss=0.0589 | IF_loss=0.0511, MQ_loss=0.0667 | acc=0.979 (IF=0.979, MQ=0.979) | lr=0.000056
420
+ 2026-01-24 22:22:23 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt (filtered to 38.584M trainable parameters)
421
+ 2026-01-24 22:22:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt (575.2MB)
422
+ 2026-01-24 22:22:24 | INFO | Step 2000: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt
423
+ 2026-01-24 22:24:18 | INFO |
424
+ ============================================================
425
+ Validation Results (took 7.25s):
426
+ Samples: 346 instruction, 346 quality
427
+ Instruction Acc: 0.6879
428
+ Quality Acc: 0.6474
429
+ Average Acc: 0.6676
430
+ Total Loss: 1.7086
431
+ Instruction Loss: 1.5472
432
+ Quality Loss: 1.8700
433
+ ============================================================
434
+ 2026-01-24 22:24:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt (filtered to 38.584M trainable parameters)
435
+ 2026-01-24 22:24:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt (575.2MB)
436
+ 2026-01-24 22:24:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt
437
+ 2026-01-24 22:24:19 | INFO | Best 3 checkpoints:
438
+ 2026-01-24 22:24:19 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
439
+ 2026-01-24 22:24:19 | INFO | 2. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
440
+ 2026-01-24 22:24:19 | INFO | 3. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
441
+ 2026-01-24 22:24:20 | INFO | Step 2100: loss=0.0284 | IF_loss=0.0286, MQ_loss=0.0281 | acc=0.990 (IF=1.000, MQ=0.979) | lr=0.000052
442
+ 2026-01-24 22:26:12 | INFO |
443
+ ============================================================
444
+ Validation Results (took 7.00s):
445
+ Samples: 346 instruction, 346 quality
446
+ Instruction Acc: 0.6879
447
+ Quality Acc: 0.6503
448
+ Average Acc: 0.6691
449
+ Total Loss: 1.7083
450
+ Instruction Loss: 1.5495
451
+ Quality Loss: 1.8672
452
+ ============================================================
453
+ 2026-01-24 22:26:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt (filtered to 38.584M trainable parameters)
454
+ 2026-01-24 22:26:13 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt (575.2MB)
455
+ 2026-01-24 22:26:13 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt
456
+ 2026-01-24 22:26:13 | INFO | Best 3 checkpoints:
457
+ 2026-01-24 22:26:13 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
458
+ 2026-01-24 22:26:13 | INFO | 2. Step 2199: acc=0.6691 (reward_model.best_2199.pt)
459
+ 2026-01-24 22:26:13 | INFO | 3. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
460
+ 2026-01-24 22:26:14 | INFO | Step 2200: loss=0.0061 | IF_loss=0.0038, MQ_loss=0.0085 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000048
461
+ 2026-01-24 22:28:05 | INFO |
462
+ ============================================================
463
+ Validation Results (took 7.64s):
464
+ Samples: 346 instruction, 346 quality
465
+ Instruction Acc: 0.6879
466
+ Quality Acc: 0.6532
467
+ Average Acc: 0.6705
468
+ Total Loss: 1.7088
469
+ Instruction Loss: 1.5525
470
+ Quality Loss: 1.8651
471
+ ============================================================
472
+ 2026-01-24 22:28:05 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt (filtered to 38.584M trainable parameters)
473
+ 2026-01-24 22:28:05 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt (575.2MB)
474
+ 2026-01-24 22:28:05 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt
475
+ 2026-01-24 22:28:05 | INFO | Best 3 checkpoints:
476
+ 2026-01-24 22:28:05 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
477
+ 2026-01-24 22:28:05 | INFO | 2. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
478
+ 2026-01-24 22:28:05 | INFO | 3. Step 2199: acc=0.6691 (reward_model.best_2199.pt)
479
+ 2026-01-24 22:28:06 | INFO | Step 2300: loss=0.0451 | IF_loss=0.0768, MQ_loss=0.0134 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000044
480
+ 2026-01-24 22:30:00 | INFO |
481
+ ============================================================
482
+ Validation Results (took 7.30s):
483
+ Samples: 346 instruction, 346 quality
484
+ Instruction Acc: 0.6908
485
+ Quality Acc: 0.6532
486
+ Average Acc: 0.6720
487
+ Total Loss: 1.7079
488
+ Instruction Loss: 1.5530
489
+ Quality Loss: 1.8628
490
+ ============================================================
491
+ 2026-01-24 22:30:00 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt (filtered to 38.584M trainable parameters)
492
+ 2026-01-24 22:30:01 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt (575.2MB)
493
+ 2026-01-24 22:30:01 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt
494
+ 2026-01-24 22:30:01 | INFO | Best 3 checkpoints:
495
+ 2026-01-24 22:30:01 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
496
+ 2026-01-24 22:30:01 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
497
+ 2026-01-24 22:30:01 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
498
+ 2026-01-24 22:30:02 | INFO | Step 2400: loss=0.0141 | IF_loss=0.0160, MQ_loss=0.0122 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000039
499
+ 2026-01-24 22:31:52 | INFO |
500
+ ============================================================
501
+ Validation Results (took 6.60s):
502
+ Samples: 346 instruction, 346 quality
503
+ Instruction Acc: 0.6879
504
+ Quality Acc: 0.6503
505
+ Average Acc: 0.6691
506
+ Total Loss: 1.7095
507
+ Instruction Loss: 1.5571
508
+ Quality Loss: 1.8619
509
+ ============================================================
510
+ 2026-01-24 22:31:53 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt (filtered to 38.584M trainable parameters)
511
+ 2026-01-24 22:31:53 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt (575.2MB)
512
+ 2026-01-24 22:31:53 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt
513
+ 2026-01-24 22:31:53 | INFO | Best 3 checkpoints:
514
+ 2026-01-24 22:31:53 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
515
+ 2026-01-24 22:31:53 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
516
+ 2026-01-24 22:31:53 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
517
+ 2026-01-24 22:31:54 | INFO | Step 2500: loss=0.0073 | IF_loss=0.0109, MQ_loss=0.0036 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000035
518
+ 2026-01-24 22:33:43 | INFO |
519
+ ============================================================
520
+ Validation Results (took 6.93s):
521
+ Samples: 346 instruction, 346 quality
522
+ Instruction Acc: 0.6879
523
+ Quality Acc: 0.6503
524
+ Average Acc: 0.6691
525
+ Total Loss: 1.7093
526
+ Instruction Loss: 1.5586
527
+ Quality Loss: 1.8601
528
+ ============================================================
529
+ 2026-01-24 22:33:43 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt (filtered to 38.584M trainable parameters)
530
+ 2026-01-24 22:33:43 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt (575.2MB)
531
+ 2026-01-24 22:33:43 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt
532
+ 2026-01-24 22:33:43 | INFO | Best 3 checkpoints:
533
+ 2026-01-24 22:33:43 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
534
+ 2026-01-24 22:33:43 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
535
+ 2026-01-24 22:33:43 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
536
+ 2026-01-24 22:33:44 | INFO | Step 2600: loss=0.0025 | IF_loss=0.0039, MQ_loss=0.0011 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000031
537
+ 2026-01-24 22:35:39 | INFO |
538
+ ============================================================
539
+ Validation Results (took 6.71s):
540
+ Samples: 346 instruction, 346 quality
541
+ Instruction Acc: 0.6879
542
+ Quality Acc: 0.6503
543
+ Average Acc: 0.6691
544
+ Total Loss: 1.7105
545
+ Instruction Loss: 1.5632
546
+ Quality Loss: 1.8577
547
+ ============================================================
548
+ 2026-01-24 22:35:39 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt (filtered to 38.584M trainable parameters)
549
+ 2026-01-24 22:35:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt (575.2MB)
550
+ 2026-01-24 22:35:39 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt
551
+ 2026-01-24 22:35:39 | INFO | Best 3 checkpoints:
552
+ 2026-01-24 22:35:39 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
553
+ 2026-01-24 22:35:39 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
554
+ 2026-01-24 22:35:39 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
555
+ 2026-01-24 22:35:40 | INFO | Step 2700: loss=0.0285 | IF_loss=0.0436, MQ_loss=0.0134 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000028
556
+ 2026-01-24 22:37:31 | INFO |
557
+ ============================================================
558
+ Validation Results (took 7.15s):
559
+ Samples: 346 instruction, 346 quality
560
+ Instruction Acc: 0.6850
561
+ Quality Acc: 0.6503
562
+ Average Acc: 0.6676
563
+ Total Loss: 1.7119
564
+ Instruction Loss: 1.5662
565
+ Quality Loss: 1.8576
566
+ ============================================================
567
+ 2026-01-24 22:37:31 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt (filtered to 38.584M trainable parameters)
568
+ 2026-01-24 22:37:32 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt (575.2MB)
569
+ 2026-01-24 22:37:32 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt
570
+ 2026-01-24 22:37:32 | INFO | Best 3 checkpoints:
571
+ 2026-01-24 22:37:32 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
572
+ 2026-01-24 22:37:32 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
573
+ 2026-01-24 22:37:32 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
574
+ 2026-01-24 22:37:33 | INFO | Step 2800: loss=0.0054 | IF_loss=0.0086, MQ_loss=0.0023 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000024
575
+ 2026-01-24 22:39:25 | INFO |
576
+ ============================================================
577
+ Validation Results (took 6.52s):
578
+ Samples: 346 instruction, 346 quality
579
+ Instruction Acc: 0.6879
580
+ Quality Acc: 0.6503
581
+ Average Acc: 0.6691
582
+ Total Loss: 1.7105
583
+ Instruction Loss: 1.5670
584
+ Quality Loss: 1.8540
585
+ ============================================================
586
+ 2026-01-24 22:39:25 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt (filtered to 38.584M trainable parameters)
587
+ 2026-01-24 22:39:26 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt (575.2MB)
588
+ 2026-01-24 22:39:26 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt
589
+ 2026-01-24 22:39:26 | INFO | Best 3 checkpoints:
590
+ 2026-01-24 22:39:26 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
591
+ 2026-01-24 22:39:26 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
592
+ 2026-01-24 22:39:26 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
593
+ 2026-01-24 22:39:27 | INFO | Step 2900: loss=0.0121 | IF_loss=0.0158, MQ_loss=0.0084 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000020
594
+ 2026-01-24 22:41:17 | INFO |
595
+ ============================================================
596
+ Validation Results (took 7.44s):
597
+ Samples: 346 instruction, 346 quality
598
+ Instruction Acc: 0.6879
599
+ Quality Acc: 0.6503
600
+ Average Acc: 0.6691
601
+ Total Loss: 1.7130
602
+ Instruction Loss: 1.5717
603
+ Quality Loss: 1.8543
604
+ ============================================================
605
+ 2026-01-24 22:41:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt (filtered to 38.584M trainable parameters)
606
+ 2026-01-24 22:41:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt (575.2MB)
607
+ 2026-01-24 22:41:18 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt
608
+ 2026-01-24 22:41:18 | INFO | Best 3 checkpoints:
609
+ 2026-01-24 22:41:18 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
610
+ 2026-01-24 22:41:18 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
611
+ 2026-01-24 22:41:18 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
612
+ 2026-01-24 22:41:19 | INFO | Step 3000: loss=0.0040 | IF_loss=0.0024, MQ_loss=0.0055 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000017
613
+ 2026-01-24 22:43:12 | INFO |
614
+ ============================================================
615
+ Validation Results (took 6.84s):
616
+ Samples: 346 instruction, 346 quality
617
+ Instruction Acc: 0.6908
618
+ Quality Acc: 0.6503
619
+ Average Acc: 0.6705
620
+ Total Loss: 1.7137
621
+ Instruction Loss: 1.5743
622
+ Quality Loss: 1.8532
623
+ ============================================================
624
+ 2026-01-24 22:43:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt (filtered to 38.584M trainable parameters)
625
+ 2026-01-24 22:43:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt (575.2MB)
626
+ 2026-01-24 22:43:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt
627
+ 2026-01-24 22:43:12 | INFO | Best 3 checkpoints:
628
+ 2026-01-24 22:43:12 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
629
+ 2026-01-24 22:43:12 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
630
+ 2026-01-24 22:43:12 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
631
+ 2026-01-24 22:43:13 | INFO | Step 3100: loss=0.0095 | IF_loss=0.0161, MQ_loss=0.0029 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000014
632
+ 2026-01-24 22:45:04 | INFO |
633
+ ============================================================
634
+ Validation Results (took 7.66s):
635
+ Samples: 346 instruction, 346 quality
636
+ Instruction Acc: 0.6879
637
+ Quality Acc: 0.6503
638
+ Average Acc: 0.6691
639
+ Total Loss: 1.7135
640
+ Instruction Loss: 1.5760
641
+ Quality Loss: 1.8510
642
+ ============================================================
643
+ 2026-01-24 22:45:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt (filtered to 38.584M trainable parameters)
644
+ 2026-01-24 22:45:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt (575.2MB)
645
+ 2026-01-24 22:45:04 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt
646
+ 2026-01-24 22:45:04 | INFO | Best 3 checkpoints:
647
+ 2026-01-24 22:45:04 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
648
+ 2026-01-24 22:45:04 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
649
+ 2026-01-24 22:45:04 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
650
+ 2026-01-24 22:45:08 | INFO | Step 3200: loss=0.0050 | IF_loss=0.0072, MQ_loss=0.0027 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000011
651
+ 2026-01-24 22:46:57 | INFO |
652
+ ============================================================
653
+ Validation Results (took 6.84s):
654
+ Samples: 346 instruction, 346 quality
655
+ Instruction Acc: 0.6879
656
+ Quality Acc: 0.6503
657
+ Average Acc: 0.6691
658
+ Total Loss: 1.7154
659
+ Instruction Loss: 1.5809
660
+ Quality Loss: 1.8499
661
+ ============================================================
662
+ 2026-01-24 22:46:57 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt (filtered to 38.584M trainable parameters)
663
+ 2026-01-24 22:46:57 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt (575.2MB)
664
+ 2026-01-24 22:46:57 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt
665
+ 2026-01-24 22:46:57 | INFO | Best 3 checkpoints:
666
+ 2026-01-24 22:46:57 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
667
+ 2026-01-24 22:46:57 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
668
+ 2026-01-24 22:46:57 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
669
+ 2026-01-24 22:46:58 | INFO | Step 3300: loss=0.0362 | IF_loss=0.0503, MQ_loss=0.0221 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000009
670
+ 2026-01-24 22:48:50 | INFO |
671
+ ============================================================
672
+ Validation Results (took 6.83s):
673
+ Samples: 346 instruction, 346 quality
674
+ Instruction Acc: 0.6879
675
+ Quality Acc: 0.6532
676
+ Average Acc: 0.6705
677
+ Total Loss: 1.7154
678
+ Instruction Loss: 1.5832
679
+ Quality Loss: 1.8477
680
+ ============================================================
681
+ 2026-01-24 22:48:50 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt (filtered to 38.584M trainable parameters)
682
+ 2026-01-24 22:48:51 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt (575.2MB)
683
+ 2026-01-24 22:48:51 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt
684
+ 2026-01-24 22:48:51 | INFO | Best 3 checkpoints:
685
+ 2026-01-24 22:48:51 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
686
+ 2026-01-24 22:48:51 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
687
+ 2026-01-24 22:48:51 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
688
+ 2026-01-24 22:48:52 | INFO | Step 3400: loss=0.0082 | IF_loss=0.0113, MQ_loss=0.0051 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000006
689
+ 2026-01-24 22:50:41 | INFO |
690
+ ============================================================
691
+ Validation Results (took 7.19s):
692
+ Samples: 346 instruction, 346 quality
693
+ Instruction Acc: 0.6908
694
+ Quality Acc: 0.6590
695
+ Average Acc: 0.6749
696
+ Total Loss: 1.7151
697
+ Instruction Loss: 1.5847
698
+ Quality Loss: 1.8456
699
+ ============================================================
700
+ 2026-01-24 22:50:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3499.pt (filtered to 38.584M trainable parameters)
701
+ 2026-01-24 22:50:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3499.pt (575.2MB)
702
+ 2026-01-24 22:50:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt
703
+ 2026-01-24 22:50:41 | INFO | Best 3 checkpoints:
704
+ 2026-01-24 22:50:41 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
705
+ 2026-01-24 22:50:41 | INFO | 2. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
706
+ 2026-01-24 22:50:41 | INFO | 3. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
707
+ 2026-01-24 22:50:42 | INFO | Step 3500: loss=0.0045 | IF_loss=0.0077, MQ_loss=0.0013 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000005
708
+ 2026-01-24 22:52:33 | INFO |
709
+ ============================================================
710
+ Validation Results (took 7.08s):
711
+ Samples: 346 instruction, 346 quality
712
+ Instruction Acc: 0.6879
713
+ Quality Acc: 0.6590
714
+ Average Acc: 0.6734
715
+ Total Loss: 1.7160
716
+ Instruction Loss: 1.5876
717
+ Quality Loss: 1.8445
718
+ ============================================================
719
+ 2026-01-24 22:52:33 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt (filtered to 38.584M trainable parameters)
720
+ 2026-01-24 22:52:34 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt (575.2MB)
721
+ 2026-01-24 22:52:34 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt
722
+ 2026-01-24 22:52:34 | INFO | Best 3 checkpoints:
723
+ 2026-01-24 22:52:34 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
724
+ 2026-01-24 22:52:34 | INFO | 2. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
725
+ 2026-01-24 22:52:34 | INFO | 3. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
726
+ 2026-01-24 22:52:35 | INFO | Step 3600: loss=0.0126 | IF_loss=0.0220, MQ_loss=0.0031 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000003
727
+ 2026-01-24 22:54:24 | INFO |
728
+ ============================================================
729
+ Validation Results (took 7.20s):
730
+ Samples: 346 instruction, 346 quality
731
+ Instruction Acc: 0.6879
732
+ Quality Acc: 0.6590
733
+ Average Acc: 0.6734
734
+ Total Loss: 1.7161
735
+ Instruction Loss: 1.5894
736
+ Quality Loss: 1.8428
737
+ ============================================================
738
+ 2026-01-24 22:54:24 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt (filtered to 38.584M trainable parameters)
739
+ 2026-01-24 22:54:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt (575.2MB)
740
+ 2026-01-24 22:54:24 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt
741
+ 2026-01-24 22:54:24 | INFO | Best 3 checkpoints:
742
+ 2026-01-24 22:54:24 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
743
+ 2026-01-24 22:54:24 | INFO | 2. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
744
+ 2026-01-24 22:54:24 | INFO | 3. Step 3699: acc=0.6734 (reward_model.best_3699.pt)
745
+ 2026-01-24 22:54:25 | INFO | Step 3700: loss=0.0085 | IF_loss=0.0041, MQ_loss=0.0130 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000002
746
+ 2026-01-24 22:56:18 | INFO |
747
+ ============================================================
748
+ Validation Results (took 6.85s):
749
+ Samples: 346 instruction, 346 quality
750
+ Instruction Acc: 0.6879
751
+ Quality Acc: 0.6618
752
+ Average Acc: 0.6749
753
+ Total Loss: 1.7157
754
+ Instruction Loss: 1.5912
755
+ Quality Loss: 1.8403
756
+ ============================================================
757
+ 2026-01-24 22:56:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3799.pt (filtered to 38.584M trainable parameters)
758
+ 2026-01-24 22:56:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3799.pt (575.2MB)
759
+ 2026-01-24 22:56:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt
760
+ 2026-01-24 22:56:19 | INFO | Best 3 checkpoints:
761
+ 2026-01-24 22:56:19 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
762
+ 2026-01-24 22:56:19 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
763
+ 2026-01-24 22:56:19 | INFO | 3. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
764
+ 2026-01-24 22:56:20 | INFO | Step 3800: loss=0.0120 | IF_loss=0.0037, MQ_loss=0.0202 | acc=0.990 (IF=1.000, MQ=0.979) | lr=0.000001
765
+ 2026-01-24 22:58:09 | INFO |
766
+ ============================================================
767
+ Validation Results (took 7.39s):
768
+ Samples: 346 instruction, 346 quality
769
+ Instruction Acc: 0.6908
770
+ Quality Acc: 0.6590
771
+ Average Acc: 0.6749
772
+ Total Loss: 1.7163
773
+ Instruction Loss: 1.5935
774
+ Quality Loss: 1.8391
775
+ ============================================================
776
+ 2026-01-24 22:58:09 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3899.pt (filtered to 38.584M trainable parameters)
777
+ 2026-01-24 22:58:10 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3899.pt (575.2MB)
778
+ 2026-01-24 22:58:10 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt
779
+ 2026-01-24 22:58:10 | INFO | Best 3 checkpoints:
780
+ 2026-01-24 22:58:10 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
781
+ 2026-01-24 22:58:10 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
782
+ 2026-01-24 22:58:10 | INFO | 3. Step 3899: acc=0.6749 (reward_model.best_3899.pt)
783
+ 2026-01-24 22:58:11 | INFO | Step 3900: loss=0.0060 | IF_loss=0.0040, MQ_loss=0.0080 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000000
784
+ 2026-01-24 23:00:02 | INFO |
785
+ ============================================================
786
+ Validation Results (took 6.60s):
787
+ Samples: 346 instruction, 346 quality
788
+ Instruction Acc: 0.6908
789
+ Quality Acc: 0.6590
790
+ Average Acc: 0.6749
791
+ Total Loss: 1.7165
792
+ Instruction Loss: 1.5967
793
+ Quality Loss: 1.8363
794
+ ============================================================
795
+ 2026-01-24 23:00:02 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt (filtered to 38.584M trainable parameters)
796
+ 2026-01-24 23:00:02 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt (575.2MB)
797
+ 2026-01-24 23:00:02 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt
798
+ 2026-01-24 23:00:02 | INFO | Best 3 checkpoints:
799
+ 2026-01-24 23:00:02 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
800
+ 2026-01-24 23:00:02 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
801
+ 2026-01-24 23:00:02 | INFO | 3. Step 3899: acc=0.6749 (reward_model.best_3899.pt)
802
+ 2026-01-24 23:00:02 | INFO | Training complete!
803
+ 2026-01-24 23:00:02 | INFO | Training complete!
finetune_human/20260124_2354/config.yaml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DEVICES: '3'
2
+ accelerate:
3
+ mixed_precision: bf16
4
+ basics:
5
+ random_seed: 42
6
+ save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
7
+ dataset:
8
+ audio_dropout:
9
+ apply_to_eval: false
10
+ apply_to_ref: true
11
+ enabled: true
12
+ eval_only_on_training: true
13
+ max_duration: 1500
14
+ min_duration: 200
15
+ train_mode: start
16
+ cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
17
+ db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
18
+ duration: 600.0
19
+ embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
20
+ max_samples: null
21
+ max_val_samples: null
22
+ metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
23
+ mode: raw_text_frozen_audio
24
+ preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
25
+ sample_rate: 24000
26
+ val_preference_file: null
27
+ loss:
28
+ IF_ratio: 0.5
29
+ filter_ties: true
30
+ label_smoothing: 0.0
31
+ reduction: mean
32
+ model:
33
+ attention_mode: SA
34
+ attn_dropout: 0.0
35
+ category_embeddings: null
36
+ dim: 768
37
+ dim_head: 64
38
+ downsample:
39
+ configs:
40
+ conv2_4x:
41
+ factor: 4
42
+ kernel_size: 5
43
+ kind: conv*2
44
+ use_layernorm: true
45
+ conv_4x:
46
+ factor: 4
47
+ kernel_size: 5
48
+ kind: conv
49
+ stage: 1
50
+ use_layernorm: true
51
+ glu_4x:
52
+ factor: 4
53
+ kernel_size: 5
54
+ kind: gluconv*2+pw
55
+ use_layernorm: true
56
+ mean:
57
+ factor: 2
58
+ kind: mean
59
+ mean_4x:
60
+ dropout: 0.0
61
+ factor: 30
62
+ kind: mean+mlp
63
+ mlp_ratio: 2.0
64
+ none:
65
+ factor: 1
66
+ kind: none
67
+ eval: mean_4x
68
+ ref: null
69
+ text: none
70
+ ff_dropout: 0.0
71
+ ff_mult: 4
72
+ freeze_audio: true
73
+ freeze_text: true
74
+ gradient_checkpointing: false
75
+ heads: 8
76
+ joint_tf_depth: 1
77
+ load_config:
78
+ checkpoint_path: null
79
+ frozen_from_pretrained: true
80
+ pretrained_name: OpenMuQ/MuQ-MuLan-large
81
+ strict: false
82
+ mlp_dim: 768
83
+ mode: concat_text_late
84
+ model_name: OpenMuQ/MuQ-MuLan-large
85
+ name: reward
86
+ no_condition: false
87
+ null_embedding:
88
+ audio:
89
+ dropout: 0.5
90
+ length: 10
91
+ lyrics:
92
+ dropout: 0.3
93
+ length: 10
94
+ text:
95
+ dropout: 0
96
+ length: 10
97
+ output_dim: 2
98
+ prompt_tf_depth: 4
99
+ sr: 24000
100
+ text_encoder:
101
+ name: muq_mulan
102
+ tune: null
103
+ text_lora_config: null
104
+ train_muq_depth: 0
105
+ train_muqmulan: false
106
+ use_audio: true
107
+ use_layer_idx: -1
108
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
109
+ run_name: null
110
+ train:
111
+ batch_size: 48
112
+ betas:
113
+ - 0.9
114
+ - 0.99
115
+ ema_decay: 0.9999
116
+ ema_update_every: 1
117
+ enable_gradient_checkpointing: true
118
+ force_clear_prev_results: false
119
+ grad_accum_every: 1
120
+ log_tensorboard: true
121
+ lr_schedule:
122
+ min_lr_ratio: 0.001
123
+ name: linear_cosine
124
+ total_steps: 4000
125
+ warmup_steps: 10
126
+ max_grad_norm: 1
127
+ mlp_lr: 1.0e-05
128
+ num_train_steps: 4000
129
+ num_valid_batches: null
130
+ num_workers: 8
131
+ other_lr: 1.0e-05
132
+ resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
133
+ resume_optimizer: false
134
+ save_model_every: 2000
135
+ use_checkpoint_config: true
136
+ use_ema: false
137
+ use_lion: false
138
+ valid_batch_size: 20
139
+ valid_every: 100
140
+ valid_frac: 0.1
141
+ verify_weights_on_load: true
142
+ validate_only: false