inspirewind's picture
Upload folder using huggingface_hub
87dcdb4 verified
{
"adamw_betas_for_muon_others": [
0.9,
0.95
],
"adamw_eps_for_muon_others": 1e-10,
"adamw_lr": 0.00064,
"adamw_lr_for_muon_others": 0.000267,
"adamw_max_grad_norm": 0.5,
"adamw_weight_decay": 0.01,
"adamw_weight_decay_for_muon_others": 0.05,
"adjust_learning_rate_for_accumulation": true,
"architectures": [
"ViralBERTForSequenceClassification"
],
"attention_head_size": 64,
"attention_probs_dropout_prob": 0.0,
"batch_size": 256,
"class_weights": null,
"classifier_dropout_prob": 0.1,
"cls_token_id": 1,
"compile_backend": "inductor",
"compile_fullgraph": true,
"compile_mode": "default",
"data_dir": "",
"dtype": "float32",
"fasta_file": "",
"feed_forward_activation": "swiglu",
"filter_n": false,
"fp16": true,
"freeze_bert_layers": 8,
"global_attn_every_n_layers": 0,
"global_max_grad_norm": 1.0,
"gradient_accumulation_steps": 8,
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"high_lr_multiplier": 1.0,
"high_lr_steps_ratio": 0.0,
"id2label": {
"0": "bac",
"1": "virus"
},
"initializer_range": 0.02,
"intermediate_size": 2048,
"label2id": {
"bac": 0,
"virus": 1
},
"label_smoothing_factor": 0.1,
"layer_norm_eps": 1e-12,
"logging_steps": 1000,
"loss_type": "ce",
"lr_scheduler_type": "cosine",
"mask_token_id": 3,
"masking_strategy": "structural",
"max_eval_samples": 2048,
"max_steps_for_sweep": null,
"min_lr_ratio": 0.05,
"mlm_probability": 0.15,
"model_type": "viralbert_for_sequence_classification",
"muon_lr": 0.0015,
"muon_max_grad_norm": 1.0,
"muon_momentum": 0.95,
"muon_weight_decay": 0.05,
"n_token_id": 9,
"norm_layer_type": "rmsnorm",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"num_train_epochs": 3,
"num_workers": 4,
"optimizer_type": "muon_adamw",
"p_codon": 0.5,
"pad_token_id": 0,
"pos_weight": null,
"position_embedding_type": "rope",
"resume_from_checkpoint": null,
"resume_mode": null,
"reverse_complement_prob": 0.5,
"rope_interpolation_factor": 1.0,
"run_name": "",
"save_steps": 10000,
"save_total_limit": 5,
"scale_loss_for_accumulation": true,
"seed": 42,
"sep_token_id": 2,
"seq_length": 512,
"seq_mask_prob": 0.5,
"seq_mask_ratio": 0.15,
"sliding_window_size": 0,
"stride": 256,
"sweep_early_stopping_patience_steps": 1000,
"sweep_early_stopping_threshold": 50.0,
"tie_word_embeddings": false,
"transformers_version": "4.56.1",
"use_compile": true,
"use_per_group_clipping": false,
"use_qk_norm": true,
"use_seq_augment": true,
"use_xpos": false,
"vocab_size": 14,
"wandb_enabled": true,
"wandb_group": "",
"wandb_name": "",
"wandb_notes": "",
"wandb_project": "",
"wandb_tags": [],
"wandb_watch_freq": null,
"wandb_watch_model": false,
"warmup_steps": 4000,
"warmup_steps_ratio": 0.1
}