HaiwenXia commited on
Commit
5780881
·
verified ·
1 Parent(s): 4d9cb09

Upload 4 files

Browse files
1118_muqmulan/ckpt/full_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4609e02573014b08e7a73e9a883abdd6615b9713e6c0c959904dc4d777029b
3
+ size 2735874200
1118_muqmulan/config.yaml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DEVICES: '7'
2
+ accelerate:
3
+ mixed_precision: bf16
4
+ basics:
5
+ random_seed: 42
6
+ save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
7
+ dataset:
8
+ audio_dropout:
9
+ apply_to_eval: true
10
+ apply_to_ref: true
11
+ enabled: true
12
+ eval_only_on_training: true
13
+ max_duration: 1000
14
+ min_duration: 250
15
+ cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
16
+ db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
17
+ duration: 600.0
18
+ embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
19
+ max_samples: null
20
+ max_val_samples: null
21
+ preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/unbiased_qwen/train.json
22
+ sample_rate: 24000
23
+ use_preextracted: true
24
+ val_preference_file: null
25
+ loss:
26
+ IF_ratio: 0.5
27
+ filter_ties: true
28
+ label_smoothing: 0.0
29
+ reduction: mean
30
+ model:
31
+ attention_mode: CA
32
+ attn_dropout: 0.0
33
+ category_embeddings: null
34
+ dim: 768
35
+ dim_head: 64
36
+ downsample:
37
+ configs:
38
+ conv2_4x:
39
+ factor: 4
40
+ kernel_size: 5
41
+ kind: conv*2
42
+ use_layernorm: true
43
+ conv_4x:
44
+ factor: 4
45
+ kernel_size: 5
46
+ kind: conv
47
+ stage: 1
48
+ use_layernorm: true
49
+ glu_4x:
50
+ factor: 4
51
+ kernel_size: 5
52
+ kind: gluconv*2+pw
53
+ use_layernorm: true
54
+ mean:
55
+ factor: 2
56
+ kind: mean
57
+ mean_4x:
58
+ dropout: 0.0
59
+ factor: 30
60
+ kind: mean+mlp
61
+ mlp_ratio: 2.0
62
+ none:
63
+ factor: 1
64
+ kind: none
65
+ eval: mean_4x
66
+ ref: null
67
+ text: none
68
+ ff_dropout: 0.0
69
+ ff_mult: 4
70
+ freeze_audio: true
71
+ freeze_text: true
72
+ gradient_checkpointing: false
73
+ heads: 8
74
+ joint_tf_depth: 1
75
+ load_config:
76
+ checkpoint_path: null
77
+ frozen_from_pretrained: true
78
+ pretrained_name: OpenMuQ/MuQ-MuLan-large
79
+ strict: false
80
+ mlp_dim: 768
81
+ mode: text_only
82
+ model_name: OpenMuQ/MuQ-MuLan-large
83
+ name: reward
84
+ null_embedding:
85
+ audio:
86
+ dropout: 0.5
87
+ length: 10
88
+ lyrics:
89
+ dropout: 0.3
90
+ length: 10
91
+ text:
92
+ dropout: 0.2
93
+ length: 10
94
+ output_dim: 2
95
+ prompt_tf_depth: 1
96
+ sr: 24000
97
+ text_encoder:
98
+ name: muq_mulan
99
+ tune: null
100
+ text_lora_config: null
101
+ train_muq_depth: 0
102
+ train_muqmulan: false
103
+ use_layer_idx: -1
104
+ project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
105
+ run_name: null
106
+ train:
107
+ batch_size: 24
108
+ betas:
109
+ - 0.9
110
+ - 0.99
111
+ ema_decay: 0.9999
112
+ ema_update_every: 1
113
+ enable_gradient_checkpointing: true
114
+ force_clear_prev_results: false
115
+ grad_accum_every: 2
116
+ log_tensorboard: true
117
+ lr_schedule:
118
+ min_lr_ratio: 0.001
119
+ name: linear_cosine
120
+ total_steps: 30000
121
+ warmup_steps: 300
122
+ max_grad_norm: 100
123
+ mlp_lr: 0.0002
124
+ num_train_steps: 30000
125
+ num_valid_batches: 10
126
+ num_workers: 8
127
+ other_lr: null
128
+ resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260118_0146/ckpt/reward_model.best_15760.pt
129
+ resume_optimizer: false
130
+ save_model_every: 2000
131
+ use_checkpoint_config: false
132
+ use_ema: true
133
+ use_lion: false
134
+ valid_batch_size: 20
135
+ valid_every: 2000
136
+ valid_frac: 0.1
137
+ verify_weights_on_load: true
138
+ validate:
139
+ checkpoint: null
140
+ dir: null
141
+ max_failure_cases: 30
142
+ num_batches: null
143
+ only: false
144
+ trust_checkpoint: true
1118_muqmulan/eval_results_0122_1121.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
1118_muqmulan/eval_results_0122_1127.jsonl ADDED
The diff for this file is too large to render. See raw diff