{ "attn_impl": "sdpa", "audio_feature_cache_manifest": "path_to_your_audio_cache/manifest.json", "audio_feature_cache_max_entries": 256, "batch_size": 2, "beats_checkpoint": "path_to_your_SO_encoder_checkpoint", "beats_lr": 1e-06, "beats_repo": "", "device": "cuda:0", "distributed": true, "dtype": "bfloat16", "encoder_token_rate": 10.0, "epochs": 3, "grad_accum_steps": 3, "gradient_checkpointing": false, "local_rank": 0, "log_every": 10, "lora_alpha": 32, "lora_dropout": 0.05, "lora_lr": 3e-05, "lora_r": 16, "lora_target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj" ], "lora_target_prefixes": [ "thinker.model" ], "lr": 3e-05, "max_grad_norm": 1.0, "max_train_samples": null, "max_valid_samples": null, "model_id": "path_to_original_Qwen2.5-Omni-7B", "num_workers": 8, "optimizer_step_per_batch": false, "output_dir": "output", "persistent_workers": true, "prefetch_factor": 4, "projector_fp32": false, "projector_lr": 1e-06, "projector_shuffle_factor": 4, "projector_type": "pixel_shuffle", "projector_weight_decay": null, "qa_root": "path_to_SO_Dataset/qa", "qa_roots": [ "path_to_SO_Dataset/qa" ], "rank": 0, "resume_checkpoint_path": "path_to_SO_7b_pretrain_checkpoint", "resume_model_only": false, "resume_tag": null, "save_every_epoch": true, "save_every_n_optimizer_steps": 1000, "save_full_model": false, "seed": 1234, "so_repo": "", "step_valid_subset_ratio": 0.05, "train_mode": "beats_lora", "train_split": "train", "valid_do_sample": false, "valid_every_n_optimizer_steps": 1000, "valid_generate_batch_size": 1, "valid_generate_full": true, "valid_generate_max_samples": 32, "valid_max_new_tokens": 96, "valid_num_beams": 1, "valid_split": "valid", "valid_subset_ratio": 0.1, "warmup_ratio": 0.03, "weight_decay": 0.01, "world_size": 8 }