| dataset: |
| N: 512 |
| P: 512 |
| S: 512 |
| cellxgene: |
| ds_type: h5ad |
| filter: false |
| num_datasets: 1139 |
| train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_cellxgene_train.csv |
| val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_cellxgene_val.csv |
| cellxgene-tahoe: |
| ds_type: filtered_h5ad |
| filter: true |
| filter_by_species: null |
| num_datasets: 1139 |
| train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_tahoe_cellxgene_train_filtered.csv |
| val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_tahoe_cellxgene_val_filtered.csv |
| chrom_token_right_idx: 2 |
| cls_token_idx: 3 |
| current: scbasecamp-cellxgene-tahoe-filtered |
| name: vci |
| num_cells: 36238464 |
| num_train_workers: 32 |
| num_val_workers: 8 |
| overrides: |
| rpe1_top5000_variable: /large_storage/ctc/datasets/vci/validation/rpe1_top5000_variable.h5ad |
| pad_length: 2048 |
| pad_token_idx: 0 |
| scbasecamp-cellxgene-tahoe: |
| ds_type: filtered_h5ad |
| filter: true |
| filter_by_species: null |
| num_datasets: 15700 |
| train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_train.csv |
| val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_val.csv |
| scbasecamp-cellxgene-tahoe-filtered: |
| ds_type: filtered_h5ad |
| filter: true |
| filter_by_species: null |
| num_datasets: 14420 |
| train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_train_filtered.csv |
| val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_val_filtered.csv |
| seed: 42 |
| embeddings: |
| current: esm2-cellxgene-basecamp-tahoe |
| esm2-cellxgene: |
| all_embeddings: /large_storage/ctc/userspace/aadduri/data/auxillary/Homo_sapiens.GRCh38.gene_symbol_to_embedding_ESM2.pt |
| ds_emb_mapping: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_cellxgene_ds_mapping.torch |
| num: 19790 |
| size: 5120 |
| valid_genes_masks: null |
| esm2-cellxgene-basecamp-tahoe: |
| all_embeddings: /large_storage/ctc/userspace/aadduri/data/auxillary/Homo_sapiens.GRCh38.gene_symbol_to_embedding_ESM2.pt |
| |
| ds_emb_mapping: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_ds_mapping.torch |
| num: 19790 |
| size: 5120 |
| valid_genes_masks: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_valid_masks.torch |
| esm2-cellxgene-tahoe: |
| all_embeddings: /large_storage/ctc/userspace/aadduri/data/auxillary/Homo_sapiens.GRCh38.gene_symbol_to_embedding_ESM2.pt |
| ds_emb_mapping: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_ds_mapping.torch |
| num: 19790 |
| size: 5120 |
| valid_genes_masks: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_valid_masks.torch |
| experiment: |
| checkpoint: |
| every_n_train_steps: 1000 |
| monitor: trainer/train_loss |
| path: /data/checkpoints |
| save_top_k: 4 |
| compiled: false |
| ddp_timeout: 3600 |
| deaware: false |
| limit_val_batches: 100 |
| local: local |
| name: vci_1.5.0_600M_basecount_tahoe_cxg |
| num_epochs: 16 |
| num_gpus_per_node: 8 |
| num_nodes: 2 |
| port: 12400 |
| profile: |
| enable_profiler: false |
| max_steps: 110 |
| profile_steps: |
| - 10 |
| - 100 |
| val_check_interval: 1000 |
| loss: |
| apply_normalization: false |
| kernel: energy |
| name: tabular |
| uniformity: false |
| model: |
| batch_size: 48 |
| batch_tabular_loss: false |
| counts: true |
| d_hid: 2048 |
| dataset_correction: true |
| dropout: 0.1 |
| ema: false |
| ema_decay: 0.999 |
| ema_update_interval: 1000 |
| emsize: 2048 |
| name: vci |
| nhead: 16 |
| nlayers: 16 |
| num_downsample: 1 |
| output_dim: 2048 |
| rda: true |
| sample_rda: false |
| use_flash_attention: true |
| variable_masking: true |
| optimizer: |
| end: 1.0 |
| gradient_accumulation_steps: 8 |
| max_grad_norm: 0.8 |
| max_lr: 1.0e-05 |
| reset_lr_on_restart: false |
| start: 0.01 |
| weight_decay: 0.01 |
| zclip: true |
| task: |
| mask: 0.2 |
| tokenizer: |
| token_dim: 5120 |
| validations: |
| diff_exp: |
| dataset: /large_storage/ctc/datasets/cellxgene/processed/rpe1_top5000_variable.h5ad |
| dataset_name: rpe1_top5000_variable |
| enable: false |
| eval_interval_multiple: 10 |
| method: null |
| obs_filter_label: non-targeting |
| obs_pert_col: gene |
| top_k_rank: 200 |
| perturbation: |
| ctrl_label: non-targeting |
| dataset: /large_storage/ctc/datasets/vci/validation/replogle_perturbation.h5ad |
| dataset_name: replogle_perturbation |
| enable: false |
| eval_interval_multiple: 10 |
| pert_col: gene |
| wandb: |
| enable: true |
| project: vci |
|
|