krkawzq commited on
Commit
8a8a230
·
verified ·
1 Parent(s): 9998e32

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +22 -3
  2. args.json +56 -0
  3. best_model.pt +3 -0
  4. vocab.json +0 -0
README.md CHANGED
@@ -1,3 +1,22 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: perturblab
3
+ tags:
4
+ - biology
5
+ - genomics
6
+ - scgpt
7
+ license: mit
8
+ base_model: bowang-lab/scGPT
9
+ ---
10
+
11
+ # scgpt-continual-pretrained
12
+
13
+ ## Model Description
14
+ This model weights were originally downloaded from the [bowang-lab/scGPT](https://github.com/bowang-lab/scGPT) repository.
15
+ It has been re-uploaded here for ease of use with the `perturblab` library.
16
+
17
+ ## Source
18
+ - **Original Repository**: [https://github.com/bowang-lab/scGPT](https://github.com/bowang-lab/scGPT)
19
+ - **Paper**: [scGPT: Foundation Model for Single Cell Biology](https://www.nature.com/articles/s41592-024-02201-0)
20
+
21
+ ## Usage
22
+ Intended for internal use with the PerturbLab framework.
args.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_source": "/scratch/ssd004/datasets/cellxgene/scb_strict/human",
3
+ "save_dir": "/scratch/ssd004/datasets/cellxgene/save/cellxgene_census_human-Dec18-13-52-2023",
4
+ "load_model": "/scratch/ssd004/datasets/cellxgene/save/scGPT_human",
5
+ "n_hvg": null,
6
+ "valid_size_or_ratio": 0.003,
7
+ "dist_backend": "nccl",
8
+ "grad_accu_steps": 1,
9
+ "pad_token": "<pad>",
10
+ "input_style": "binned",
11
+ "input_emb_style": "continuous",
12
+ "n_bins": 51,
13
+ "max_seq_len": 1200,
14
+ "training_tasks": "both",
15
+ "dist_url": "tcp://gpu183.cluster.local:54165",
16
+ "mask_ratio": [
17
+ 0.25,
18
+ 0.5,
19
+ 0.75
20
+ ],
21
+ "trunc_by_sample": true,
22
+ "vocab_path": "/scratch/ssd004/datasets/cellxgene/scFormer/scformer/tokenizer/default_census_vocab.json",
23
+ "rank": 0,
24
+ "batch_size": 24,
25
+ "eval_batch_size": 48,
26
+ "epochs": 10,
27
+ "lr": 0.0001,
28
+ "scheduler_interval": 100,
29
+ "scheduler_factor": 0.99,
30
+ "warmup_ratio_or_step": 10000.0,
31
+ "no_cls": false,
32
+ "no_cce": true,
33
+ "fp16": true,
34
+ "fast_transformer": true,
35
+ "annotation_source": "/scratch/ssd004/datasets/cellxgene/tabula_sapiens/parquet/",
36
+ "annotation_valid_size_or_ratio": 0.1,
37
+ "nlayers": 12,
38
+ "nheads": 8,
39
+ "embsize": 512,
40
+ "d_hid": 512,
41
+ "dropout": 0.2,
42
+ "n_layers_cls": 3,
43
+ "annote_max_seq_len": 5000,
44
+ "log_interval": 500,
45
+ "save_interval": 1000,
46
+ "mask_value": -1,
47
+ "pad_value": -2,
48
+ "USE_CLS": true,
49
+ "USE_CCE": false,
50
+ "MVC": true,
51
+ "USE_GENERATIVE_TRAINING": true,
52
+ "world_size": 8,
53
+ "distributed": true,
54
+ "local_rank": 0,
55
+ "gpu": 0
56
+ }
best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0252a1971e0cd619b7116dbab3177432236c4537225d54280a2aa7e5fe402a
3
+ size 207861754
vocab.json ADDED
The diff for this file is too large to render. See raw diff