{ "adamw_betas_for_muon_others": [ 0.9, 0.95 ], "adamw_eps_for_muon_others": 1e-10, "adamw_lr": 0.00064, "adamw_lr_for_muon_others": 0.000267, "adamw_max_grad_norm": 0.5, "adamw_weight_decay": 0.01, "adamw_weight_decay_for_muon_others": 0.05, "adjust_learning_rate_for_accumulation": true, "architectures": [ "ViralBERTForSequenceClassification" ], "attention_head_size": 64, "attention_probs_dropout_prob": 0.0, "batch_size": 256, "class_weights": null, "classifier_dropout_prob": 0.1, "cls_token_id": 1, "compile_backend": "inductor", "compile_fullgraph": true, "compile_mode": "default", "data_dir": "", "dtype": "float32", "fasta_file": "", "feed_forward_activation": "swiglu", "filter_n": false, "fp16": true, "freeze_bert_layers": 8, "global_attn_every_n_layers": 0, "global_max_grad_norm": 1.0, "gradient_accumulation_steps": 8, "hidden_dropout_prob": 0.0, "hidden_size": 768, "high_lr_multiplier": 1.0, "high_lr_steps_ratio": 0.0, "id2label": { "0": "bac", "1": "virus" }, "initializer_range": 0.02, "intermediate_size": 2048, "label2id": { "bac": 0, "virus": 1 }, "label_smoothing_factor": 0.1, "layer_norm_eps": 1e-12, "logging_steps": 1000, "loss_type": "ce", "lr_scheduler_type": "cosine", "mask_token_id": 3, "masking_strategy": "structural", "max_eval_samples": 2048, "max_steps_for_sweep": null, "min_lr_ratio": 0.05, "mlm_probability": 0.15, "model_type": "viralbert_for_sequence_classification", "muon_lr": 0.0015, "muon_max_grad_norm": 1.0, "muon_momentum": 0.95, "muon_weight_decay": 0.05, "n_token_id": 9, "norm_layer_type": "rmsnorm", "num_attention_heads": 12, "num_hidden_layers": 12, "num_train_epochs": 3, "num_workers": 4, "optimizer_type": "muon_adamw", "p_codon": 0.5, "pad_token_id": 0, "pos_weight": null, "position_embedding_type": "rope", "resume_from_checkpoint": null, "resume_mode": null, "reverse_complement_prob": 0.5, "rope_interpolation_factor": 1.0, "run_name": "", "save_steps": 10000, "save_total_limit": 5, "scale_loss_for_accumulation": true, "seed": 42, "sep_token_id": 2, "seq_length": 512, "seq_mask_prob": 0.5, "seq_mask_ratio": 0.15, "sliding_window_size": 0, "stride": 256, "sweep_early_stopping_patience_steps": 1000, "sweep_early_stopping_threshold": 50.0, "tie_word_embeddings": false, "transformers_version": "4.56.1", "use_compile": true, "use_per_group_clipping": false, "use_qk_norm": true, "use_seq_augment": true, "use_xpos": false, "vocab_size": 14, "wandb_enabled": true, "wandb_group": "", "wandb_name": "", "wandb_notes": "", "wandb_project": "", "wandb_tags": [], "wandb_watch_freq": null, "wandb_watch_model": false, "warmup_steps": 4000, "warmup_steps_ratio": 0.1 }