mineself2016 commited on
Commit
0941428
·
verified ·
1 Parent(s): 0231a38

Sync README model card from project root

Browse files
Files changed (1) hide show
  1. README.md +44 -14
README.md CHANGED
@@ -101,11 +101,11 @@ from transformers import AutoTokenizer, AutoModel
101
 
102
  # Load pretrained model and tokenizer
103
  tokenizer = AutoTokenizer.from_pretrained(
104
- "your-username/GeneMamba-24l-512d",
105
  trust_remote_code=True
106
  )
107
  model = AutoModel.from_pretrained(
108
- "your-username/GeneMamba-24l-512d",
109
  trust_remote_code=True
110
  )
111
 
@@ -151,7 +151,7 @@ from torch.utils.data import Dataset
151
 
152
  # Load model with classification head
153
  model = AutoModelForSequenceClassification.from_pretrained(
154
- "your-username/GeneMamba-24l-512d",
155
  num_labels=10, # number of cell types
156
  trust_remote_code=True
157
  )
@@ -215,7 +215,7 @@ from torch.utils.data import Dataset
215
 
216
  # Load model for masked LM
217
  model = AutoModelForMaskedLM.from_pretrained(
218
- "your-username/GeneMamba-24l-512d",
219
  trust_remote_code=True
220
  )
221
 
@@ -266,7 +266,7 @@ from transformers.utils.hub import register_and_push_to_hub_with_git_history
266
 
267
  # Create config
268
  config = AutoConfig.from_pretrained(
269
- "your-username/GeneMamba-24l-512d",
270
  trust_remote_code=True
271
  )
272
 
@@ -340,17 +340,27 @@ Task-Specific Heads:
340
  ```python
341
  # Standard loading (backbone only)
342
  from transformers import AutoModel
343
- model = AutoModel.from_pretrained("user/GeneMamba", trust_remote_code=True)
344
 
345
  # Classification
346
  from transformers import AutoModelForSequenceClassification
347
  model = AutoModelForSequenceClassification.from_pretrained(
348
- "user/GeneMamba", num_labels=10, trust_remote_code=True
349
  )
350
 
351
  # Masked LM
352
  from transformers import AutoModelForMaskedLM
353
- model = AutoModelForMaskedLM.from_pretrained("user/GeneMamba", trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
354
  ```
355
 
356
  ### Saving Models
@@ -375,7 +385,7 @@ All hyperparameters are stored in `config.json`:
375
  "hidden_size": 512,
376
  "num_hidden_layers": 24,
377
  "vocab_size": 25426,
378
- "mamba_mode": "gate",
379
  "embedding_pooling": "mean"
380
  }
381
  ```
@@ -434,15 +444,16 @@ input_ids = tokenizer(gene_ids, return_tensors="pt", padding=True)["input_ids"]
434
 
435
  See the `examples/` directory for complete scripts:
436
 
437
- - `1_extract_embeddings.py` - Extract cell embeddings
438
- - `2_finetune_classification.py` - Cell type annotation
439
- - `3_continue_pretraining.py` - Domain adaptation
440
- - `4_pretrain_from_scratch.py` - Training from scratch
 
441
 
442
  Run any example:
443
 
444
  ```bash
445
- python examples/1_extract_embeddings.py
446
  ```
447
 
448
  ---
@@ -471,6 +482,25 @@ This is expected for custom models. Either:
471
  1. Set `trust_remote_code=True` (safe if loading from official repo)
472
  2. Or use `sys.path.insert(0, '.')` if loading local code
473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  ### Out of Memory (OOM)
475
 
476
  Reduce batch size:
 
101
 
102
  # Load pretrained model and tokenizer
103
  tokenizer = AutoTokenizer.from_pretrained(
104
+ "mineself2016/GeneMamba",
105
  trust_remote_code=True
106
  )
107
  model = AutoModel.from_pretrained(
108
+ "mineself2016/GeneMamba",
109
  trust_remote_code=True
110
  )
111
 
 
151
 
152
  # Load model with classification head
153
  model = AutoModelForSequenceClassification.from_pretrained(
154
+ "mineself2016/GeneMamba",
155
  num_labels=10, # number of cell types
156
  trust_remote_code=True
157
  )
 
215
 
216
  # Load model for masked LM
217
  model = AutoModelForMaskedLM.from_pretrained(
218
+ "mineself2016/GeneMamba",
219
  trust_remote_code=True
220
  )
221
 
 
266
 
267
  # Create config
268
  config = AutoConfig.from_pretrained(
269
+ "mineself2016/GeneMamba",
270
  trust_remote_code=True
271
  )
272
 
 
340
  ```python
341
  # Standard loading (backbone only)
342
  from transformers import AutoModel
343
+ model = AutoModel.from_pretrained("mineself2016/GeneMamba", trust_remote_code=True)
344
 
345
  # Classification
346
  from transformers import AutoModelForSequenceClassification
347
  model = AutoModelForSequenceClassification.from_pretrained(
348
+ "mineself2016/GeneMamba", num_labels=10, trust_remote_code=True
349
  )
350
 
351
  # Masked LM
352
  from transformers import AutoModelForMaskedLM
353
+ model = AutoModelForMaskedLM.from_pretrained("mineself2016/GeneMamba", trust_remote_code=True)
354
+ ```
355
+
356
+ Load other model sizes from subfolders:
357
+
358
+ ```python
359
+ model_24l_768d = AutoModel.from_pretrained(
360
+ "mineself2016/GeneMamba",
361
+ subfolder="24l-768d",
362
+ trust_remote_code=True,
363
+ )
364
  ```
365
 
366
  ### Saving Models
 
385
  "hidden_size": 512,
386
  "num_hidden_layers": 24,
387
  "vocab_size": 25426,
388
+ "mamba_mode": "mean",
389
  "embedding_pooling": "mean"
390
  }
391
  ```
 
444
 
445
  See the `examples/` directory for complete scripts:
446
 
447
+ - `00_preprocess_to_input_ids.py` - h5ad to ranked gene token IDs
448
+ - `01_extract_embeddings.py` - Extract cell embeddings
449
+ - `10_finetune_classification.py` - Cell type annotation
450
+ - `20_continue_pretraining_reference.py` - Domain adaptation
451
+ - `21_pretrain_from_scratch_reference.py` - Training from scratch
452
 
453
  Run any example:
454
 
455
  ```bash
456
+ python examples/01_extract_embeddings.py
457
  ```
458
 
459
  ---
 
482
  1. Set `trust_remote_code=True` (safe if loading from official repo)
483
  2. Or use `sys.path.insert(0, '.')` if loading local code
484
 
485
+ ### Old Cached Code / Shape Mismatch
486
+
487
+ If you still see old loading errors after an update, force refresh files from Hub:
488
+
489
+ ```python
490
+ from transformers import AutoModel
491
+ model = AutoModel.from_pretrained(
492
+ "mineself2016/GeneMamba",
493
+ trust_remote_code=True,
494
+ force_download=True,
495
+ )
496
+ ```
497
+
498
+ You can also clear local cache if needed:
499
+
500
+ ```bash
501
+ rm -rf ~/.cache/huggingface/hub/models--mineself2016--GeneMamba
502
+ ```
503
+
504
  ### Out of Memory (OOM)
505
 
506
  Reduce batch size: