viralBERT-binning / config.json
inspirewind's picture
Upload folder using huggingface_hub
7662c6d verified
{
"adamw_betas_for_muon_others": [
0.9,
0.95
],
"adamw_eps_for_muon_others": 1e-10,
"adamw_lr": 0.00064,
"adamw_lr_for_muon_others": 0.000267,
"adamw_max_grad_norm": 0.5,
"adamw_weight_decay": 0.01,
"adamw_weight_decay_for_muon_others": 0.05,
"adjust_learning_rate_for_accumulation": true,
"architectures": [
"ViralBERTForContrastiveBinning"
],
"attention_head_size": 64,
"attention_probs_dropout_prob": 0.0,
"batch_size": 256,
"cls_token_id": 1,
"compile_backend": "inductor",
"compile_fullgraph": true,
"compile_mode": "default",
"data_dir": "data/raw",
"dtype": "float32",
"fasta_file": "meta-1-vg.fna",
"feed_forward_activation": "swiglu",
"filter_n": false,
"fp16": true,
"global_attn_every_n_layers": 0,
"global_max_grad_norm": 1.0,
"gradient_accumulation_steps": 1,
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"high_lr_multiplier": 1.0,
"high_lr_steps_ratio": 0.0,
"initializer_range": 0.02,
"intermediate_size": 2048,
"layer_norm_eps": 1e-12,
"logging_steps": 1000,
"lr_scheduler_type": "warmup_cosine",
"mask_token_id": 3,
"masking_strategy": "structural",
"max_eval_samples": 2048,
"max_steps_for_sweep": null,
"min_lr_ratio": 0.05,
"mlm_probability": 0.15,
"moco_dim": 128,
"moco_k": 65536,
"moco_m": 0.999,
"moco_t": 0.07,
"model_type": "viralbert_for_contrastive_binning",
"muon_lr": 0.0015,
"muon_max_grad_norm": 1.0,
"muon_momentum": 0.95,
"muon_weight_decay": 0.05,
"norm_layer_type": "rmsnorm",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"num_train_epochs": 3,
"num_workers": 4,
"optimizer_type": "muon_adamw",
"p_codon": 0.5,
"pad_token_id": 0,
"position_embedding_type": "rope",
"resume_from_checkpoint": null,
"resume_mode": null,
"reverse_complement_prob": 0.5,
"rope_interpolation_factor": 1.0,
"run_name": "",
"save_steps": 10000,
"save_total_limit": 5,
"scale_loss_for_accumulation": true,
"seed": 42,
"sep_token_id": 2,
"seq_length": 512,
"sliding_window_size": 0,
"stride": 256,
"sweep_early_stopping_patience_steps": 1000,
"sweep_early_stopping_threshold": 50.0,
"tie_word_embeddings": false,
"transformers_version": "4.56.1",
"unfreeze_last_n_layers": 6,
"use_compile": true,
"use_per_group_clipping": false,
"use_qk_norm": true,
"use_xpos": false,
"vocab_size": 14,
"wandb_enabled": true,
"wandb_group": "",
"wandb_name": "",
"wandb_notes": "",
"wandb_project": "",
"wandb_tags": [],
"wandb_watch_freq": null,
"wandb_watch_model": false,
"warmup_steps": 4000,
"warmup_steps_ratio": 0.1
}