RC_augmentation: false _dataset_cfg_lookup: gencode128k_basic: hf_path: jzshared/gencode128k_basic path: data/gencode128k_basic type: refseq gencode128k_debug: hf_path: jzshared/gencode128k_debug path: data/gencode128k_debug type: refseq gencode_human_12.8k: hf_path: jzshared/gencode_human_12.8k path: data/gencode_human_12.8k type: refseq gencode_human_128k: hf_path: jzshared/gencode_human_128k path: data/gencode_human_128k type: refseq hg38_128k: hf_path: jzshared/hg38_cds_anchored_128000 path: data/hg38_cds_anchored_128000 type: refseq hg38_12k: hf_path: jzshared/hg38_12800 path: data/hg38_cds_anchored_len12800_mincds150_1000000samples type: refseq hg38_cds_4m: hf_path: null path: data/hg38_cds_dataset_4m_filtered type: refseq alias: CKPT_DEBUG alpha_exp: 1.0 alpha_max: 0.03 arch: hnet batch_size: 32 bp_per_token: 3 cluster: mila cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k model=hnet/mamba_64m_2dc max_len=12800 batch_size=32 grad_acc_steps=1 max_train_steps=20 eval_steps=10 save_steps=10 alpha_max=0.03 use_routing_floor=false strictness_max=0 region_info=promoter1_cds1_utr1_exon1_intron1_nig1_dig1 alias=CKPT_DEBUG bp_per_token=3 use_wandb=true upload_to_hf=true hf_repo=jzshared/ckpt_debug config_path: null data: gencode_human_12.8k data_alias: ${.data}_${max_len} dataset: ${_dataset_cfg_lookup[${data}]} device: cuda device_type: GPU dirs: data_cache: ${project_root}/data_cache/ data_storage: ${project_root}/data/ hydra: ${project_root}/temp/hydra/ output: ${project_root}/output/${data_alias}/${alias}/ temp: ${project_root}/temp/working_dir/${uid}/ wandb_cache: ${oc.env:WANDB_CACHE_DIR,${project_root}/temp/wandb_cache/} epochs: 200 eval_batch_size: ${batch_size} eval_steps: 10 grad_acc_steps: 1 hf_repo: jzshared/ckpt_debug hf_repo_owner: jzshared is_distributed: true local_rank: 0 logging: level: info log_wandb_metric_to_stdout: true lr: 0.001 master_port: '46235' max_data_samples: null max_eval_samples: 1000 max_len: 12800 max_length: ${max_len} max_train_steps: 20 min_routing_tokens: 8 mode: Stage1 model: arch: hnet name: hnet_mamba_64m_2dc model_alias: ${oc.select:model.name,UnknownModel} model_cfg: arch_layout: - m2 - - m2 - - m15 - m2 - m2 attn_cfg: num_heads: - 8 - 8 - 12 rotary_emb_dim: - 16 - 16 - 24 window_size: - 511 - 511 - -1 d_intermediate: - 0 - 0 - 2048 d_model: - 512 - 512 - 768 min_routing_tokens: ${min_routing_tokens} n_gpt: 1.0 r_hi: ${r_hi} r_low: ${r_low} r_warm_up_end: ${r_warm_up_end} r_warm_up_start: ${r_warm_up_start} ssm_cfg: chunk_size: 256 d_conv: 4 d_state: 64 expand: 2 head_dim: 64 tie_embeddings: true vocab_size: 12 name: hnet_base private: false project_root: ${hydra:runtime.cwd} r_hi: 0.3 r_low: 0.0 r_warm_up_end: 750 r_warm_up_start: 200 rank: 0 reference_loss: null region_info: promoter1_cds1_utr1_exon1_intron1_nig1_dig1 save_steps: 10 seed: 0 source: ${dataset.type} strictness_exp: 1.0 strictness_max: 0 tokenizer: fast training: adam_beta1: 0.9 adam_beta2: 0.95 bf16: true dataloader_drop_last: true dataloader_num_workers: 1 disable_tqdm: false do_train: true eval_steps: ${eval_steps} eval_strategy: steps gradient_accumulation_steps: ${grad_acc_steps} gradient_checkpointing: false group_by_length: false label_names: - input_ids learning_rate: ${lr} logging_steps: 10 lr_scheduler_type: linear max_grad_norm: 2.0 max_train_steps: ${max_train_steps} num_train_epochs: ${epochs} output_dir: ${dirs.output} overrides: {} per_device_eval_batch_size: ${eval_batch_size} per_device_train_batch_size: ${batch_size} remove_unused_columns: false report_to: null save_steps: ${save_steps} save_strategy: steps use_lr_multiplier: true warmup_steps: 500 weight_decay: 0.1 training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len} uid: 98p9y5w8 upload_to_hf: true use_routing_floor: false use_wandb: true valid_test_downsample: null version: NA wandb: dir: ${dirs.wandb_cache} entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}} id: 98p9y5w8 mode: online name: CKPT_DEBUG project: ${oc.select:env.vars.wandb_proj,DNAFM} step_metric: null tags: - ${mode} url: https://wandb.ai/jzshared/DNAFM/runs/98p9y5w8 warmup_steps: 0 world_size: 4