| """Constants for A1-Max MuQ LoRA inference handler.""" |
|
|
| PERCEPIANO_DIMENSIONS = [ |
| "dynamics", |
| "timing", |
| "pedaling", |
| "articulation", |
| "phrasing", |
| "interpretation", |
| ] |
|
|
| |
| |
| MODEL_CONFIG = { |
| |
| "muq_layer_start": 9, |
| "muq_layer_end": 13, |
| "muq_dim": 1024, |
| |
| "input_dim": 1024, |
| "hidden_dim": 512, |
| "num_labels": 6, |
| "dropout": 0.2, |
| |
| "target_sr": 24000, |
| "max_frames": 1000, |
| } |
|
|
| |
| MODEL_INFO = { |
| "name": "A1-Max MuQ LoRA", |
| "type": "audio-muq-lora", |
| "pairwise": 0.7872, |
| "description": "A1-Max: MuQ + LoRA with ListMLE, CCC, mixup, hard negative mining", |
| "architecture": "MuQLoRAMaxModel (MuQ L9-12 avg -> attn pool -> encoder -> 6-dim regression)", |
| "best_config": "A1max_r32_L7-12_ls0.1", |
| } |
|
|
| |
| N_FOLDS = 4 |
|
|
| |
| |
| MAESTRO_CALIBRATION = { |
| "dynamics": { |
| "mean": 0.560947, "std": 0.021063, |
| "p5": 0.526612, "p25": 0.546136, "p50": 0.560859, "p75": 0.575372, "p95": 0.59573, |
| }, |
| "timing": { |
| "mean": 0.531883, "std": 0.028791, |
| "p5": 0.480467, "p25": 0.512976, "p50": 0.534302, "p75": 0.552652, "p95": 0.575376, |
| }, |
| "pedaling": { |
| "mean": 0.590465, "std": 0.030438, |
| "p5": 0.534399, "p25": 0.572243, "p50": 0.593731, "p75": 0.611854, "p95": 0.635053, |
| }, |
| "articulation": { |
| "mean": 0.553624, "std": 0.014287, |
| "p5": 0.53023, "p25": 0.543792, "p50": 0.553554, "p75": 0.563275, "p95": 0.577426, |
| }, |
| "phrasing": { |
| "mean": 0.550866, "std": 0.013717, |
| "p5": 0.528466, "p25": 0.541541, "p50": 0.550801, "p75": 0.560116, "p95": 0.573567, |
| }, |
| "interpretation": { |
| "mean": 0.564377, "std": 0.023457, |
| "p5": 0.522434, "p25": 0.549302, "p50": 0.566195, "p75": 0.580981, "p95": 0.599733, |
| }, |
| } |
|
|