mineself2016
/

GeneMamba

@@ -101,11 +101,11 @@ from transformers import AutoTokenizer, AutoModel
 # Load pretrained model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained(
-    "your-username/GeneMamba-24l-512d",
     trust_remote_code=True
 )
 model = AutoModel.from_pretrained(
-    "your-username/GeneMamba-24l-512d",
     trust_remote_code=True
 )
@@ -151,7 +151,7 @@ from torch.utils.data import Dataset
 # Load model with classification head
 model = AutoModelForSequenceClassification.from_pretrained(
-    "your-username/GeneMamba-24l-512d",
     num_labels=10,  # number of cell types
     trust_remote_code=True
 )
@@ -215,7 +215,7 @@ from torch.utils.data import Dataset
 # Load model for masked LM
 model = AutoModelForMaskedLM.from_pretrained(
-    "your-username/GeneMamba-24l-512d",
     trust_remote_code=True
 )
@@ -266,7 +266,7 @@ from transformers.utils.hub import register_and_push_to_hub_with_git_history
 # Create config
 config = AutoConfig.from_pretrained(
-    "your-username/GeneMamba-24l-512d",
     trust_remote_code=True
 )
@@ -340,17 +340,27 @@ Task-Specific Heads:
 ```python
 # Standard loading (backbone only)
 from transformers import AutoModel
-model = AutoModel.from_pretrained("user/GeneMamba", trust_remote_code=True)
 # Classification
 from transformers import AutoModelForSequenceClassification
 model = AutoModelForSequenceClassification.from_pretrained(
-    "user/GeneMamba", num_labels=10, trust_remote_code=True
 )
 # Masked LM
 from transformers import AutoModelForMaskedLM
-model = AutoModelForMaskedLM.from_pretrained("user/GeneMamba", trust_remote_code=True)
 ```
 ### Saving Models
@@ -375,7 +385,7 @@ All hyperparameters are stored in `config.json`:
   "hidden_size": 512,
   "num_hidden_layers": 24,
   "vocab_size": 25426,
-  "mamba_mode": "gate",
   "embedding_pooling": "mean"
 }
 ```
@@ -434,15 +444,16 @@ input_ids = tokenizer(gene_ids, return_tensors="pt", padding=True)["input_ids"]
 See the `examples/` directory for complete scripts:
-- `1_extract_embeddings.py` - Extract cell embeddings
-- `2_finetune_classification.py` - Cell type annotation
-- `3_continue_pretraining.py` - Domain adaptation
-- `4_pretrain_from_scratch.py` - Training from scratch
 Run any example:
 ```bash
-python examples/1_extract_embeddings.py
 ```
 ---
@@ -471,6 +482,25 @@ This is expected for custom models. Either:
 1. Set `trust_remote_code=True` (safe if loading from official repo)
 2. Or use `sys.path.insert(0, '.')` if loading local code
 ### Out of Memory (OOM)
 Reduce batch size:

 # Load pretrained model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained(
+    "mineself2016/GeneMamba",
     trust_remote_code=True
 )
 model = AutoModel.from_pretrained(
+    "mineself2016/GeneMamba",
     trust_remote_code=True
 )
 # Load model with classification head
 model = AutoModelForSequenceClassification.from_pretrained(
+    "mineself2016/GeneMamba",
     num_labels=10,  # number of cell types
     trust_remote_code=True
 )
 # Load model for masked LM
 model = AutoModelForMaskedLM.from_pretrained(
+    "mineself2016/GeneMamba",
     trust_remote_code=True
 )
 # Create config
 config = AutoConfig.from_pretrained(
+    "mineself2016/GeneMamba",
     trust_remote_code=True
 )
 ```python
 # Standard loading (backbone only)
 from transformers import AutoModel
+model = AutoModel.from_pretrained("mineself2016/GeneMamba", trust_remote_code=True)
 # Classification
 from transformers import AutoModelForSequenceClassification
 model = AutoModelForSequenceClassification.from_pretrained(
+    "mineself2016/GeneMamba", num_labels=10, trust_remote_code=True
 )
 # Masked LM
 from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("mineself2016/GeneMamba", trust_remote_code=True)
+```
+Load other model sizes from subfolders:
+```python
+model_24l_768d = AutoModel.from_pretrained(
+    "mineself2016/GeneMamba",
+    subfolder="24l-768d",
+    trust_remote_code=True,
+)
 ```
 ### Saving Models
   "hidden_size": 512,
   "num_hidden_layers": 24,
   "vocab_size": 25426,
+    "mamba_mode": "mean",
   "embedding_pooling": "mean"
 }
 ```
 See the `examples/` directory for complete scripts:
+- `00_preprocess_to_input_ids.py` - h5ad to ranked gene token IDs
+- `01_extract_embeddings.py` - Extract cell embeddings
+- `10_finetune_classification.py` - Cell type annotation
+- `20_continue_pretraining_reference.py` - Domain adaptation
+- `21_pretrain_from_scratch_reference.py` - Training from scratch
 Run any example:
 ```bash
+python examples/01_extract_embeddings.py
 ```
 ---
 1. Set `trust_remote_code=True` (safe if loading from official repo)
 2. Or use `sys.path.insert(0, '.')` if loading local code
+### Old Cached Code / Shape Mismatch
+If you still see old loading errors after an update, force refresh files from Hub:
+```python
+from transformers import AutoModel
+model = AutoModel.from_pretrained(
+    "mineself2016/GeneMamba",
+    trust_remote_code=True,
+    force_download=True,
+)
+```
+You can also clear local cache if needed:
+```bash
+rm -rf ~/.cache/huggingface/hub/models--mineself2016--GeneMamba
+```
 ### Out of Memory (OOM)
 Reduce batch size: