Spaces:
Running
Running
Commit
·
df00fce
1
Parent(s):
a3047bf
fix model links
Browse files- README.md +4 -4
- index.html +5 -4
- notebooks/00_quickstart_inference.ipynb +11 -9
- notebooks/01_tracks_prediction.ipynb +1 -1
README.md
CHANGED
|
@@ -18,7 +18,7 @@ Notebooks live in `./notebooks/`:
|
|
| 18 |
- `00_quickstart_inference.ipynb` — load a checkpoint + run inference
|
| 19 |
- `01_tracks_prediction.ipynb` — sequence → functional tracks (+ plotting)
|
| 20 |
- `02_genome_annotation_segmentation.ipynb` — sequence → annotation
|
| 21 |
-
- `03_finetune_head.ipynb` — fine-tune on
|
| 22 |
- `04_model_interpretation.ipynb` — interpretation of post-trained model
|
| 23 |
- `05_sequence_generation.ipynb` — fine-tune NTv3 to generate enhancer sequences
|
| 24 |
|
|
@@ -43,7 +43,7 @@ import torch
|
|
| 43 |
|
| 44 |
pipe = pipeline(
|
| 45 |
task="ntv3-tracks",
|
| 46 |
-
model="InstaDeepAI/
|
| 47 |
trust_remote_code=True,
|
| 48 |
device="cuda",
|
| 49 |
torch_dtype=torch.bfloat16,
|
|
@@ -54,9 +54,9 @@ out = pipe("ACGT...")
|
|
| 54 |
|
| 55 |
## Checkpoints
|
| 56 |
|
| 57 |
-
**Pre-trained:** `InstaDeepAI/
|
| 58 |
|
| 59 |
-
**Post-trained:** `InstaDeepAI/
|
| 60 |
|
| 61 |
## Links
|
| 62 |
|
|
|
|
| 18 |
- `00_quickstart_inference.ipynb` — load a checkpoint + run inference
|
| 19 |
- `01_tracks_prediction.ipynb` — sequence → functional tracks (+ plotting)
|
| 20 |
- `02_genome_annotation_segmentation.ipynb` — sequence → annotation
|
| 21 |
+
- `03_finetune_head.ipynb` — fine-tune on bigwig tracks
|
| 22 |
- `04_model_interpretation.ipynb` — interpretation of post-trained model
|
| 23 |
- `05_sequence_generation.ipynb` — fine-tune NTv3 to generate enhancer sequences
|
| 24 |
|
|
|
|
| 43 |
|
| 44 |
pipe = pipeline(
|
| 45 |
task="ntv3-tracks",
|
| 46 |
+
model="InstaDeepAI/NTv3_650M",
|
| 47 |
trust_remote_code=True,
|
| 48 |
device="cuda",
|
| 49 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 54 |
|
| 55 |
## Checkpoints
|
| 56 |
|
| 57 |
+
**Pre-trained:** `InstaDeepAI/NTv3_8M_pre`, `InstaDeepAI/NTv3_100M_pre`, `InstaDeepAI/NTv3_650M_pre`
|
| 58 |
|
| 59 |
+
**Post-trained:** `InstaDeepAI/NTv3_100M`, `InstaDeepAI/NTv3_650M`
|
| 60 |
|
| 61 |
## Links
|
| 62 |
|
index.html
CHANGED
|
@@ -134,9 +134,9 @@
|
|
| 134 |
<ul>
|
| 135 |
<li>Pretrained checkpoints (see <a href="https://huggingface.co/collections/InstaDeepAI/nucleotide-transformer-v3" target="_blank" rel="noopener">collection</a>):
|
| 136 |
<div style="margin-top: 8px; margin-left: 0;">
|
| 137 |
-
<div><a href="https://huggingface.co/InstaDeepAI/
|
| 138 |
-
<div><a href="https://huggingface.co/InstaDeepAI/
|
| 139 |
-
<div><a href="https://huggingface.co/InstaDeepAI/
|
| 140 |
</div>
|
| 141 |
</li>
|
| 142 |
<li>Post-trained checkpoints:
|
|
@@ -179,7 +179,8 @@ pipe = pipeline(
|
|
| 179 |
<h2>Links</h2>
|
| 180 |
<ul>
|
| 181 |
<li>Paper: (add link)</li>
|
| 182 |
-
<li><a href="https://github.com/instadeepai/nucleotide-transformer">JAX
|
|
|
|
| 183 |
</ul>
|
| 184 |
</div>
|
| 185 |
</div>
|
|
|
|
| 134 |
<ul>
|
| 135 |
<li>Pretrained checkpoints (see <a href="https://huggingface.co/collections/InstaDeepAI/nucleotide-transformer-v3" target="_blank" rel="noopener">collection</a>):
|
| 136 |
<div style="margin-top: 8px; margin-left: 0;">
|
| 137 |
+
<div><a href="https://huggingface.co/InstaDeepAI/NTv3_8M_pre"><code>InstaDeepAI/NTv3_8M_pre</code></a></div>
|
| 138 |
+
<div><a href="https://huggingface.co/InstaDeepAI/NTv3_100M_pre"><code>InstaDeepAI/NTv3_100M_pre</code></a></div>
|
| 139 |
+
<div><a href="https://huggingface.co/InstaDeepAI/NTv3_650M_pre"><code>InstaDeepAI/NTv3_650M_pre</code></a></div>
|
| 140 |
</div>
|
| 141 |
</li>
|
| 142 |
<li>Post-trained checkpoints:
|
|
|
|
| 179 |
<h2>Links</h2>
|
| 180 |
<ul>
|
| 181 |
<li>Paper: (add link)</li>
|
| 182 |
+
<li><a href="https://github.com/instadeepai/nucleotide-transformer">JAX model code (GitHub)</a></li>
|
| 183 |
+
<li>NTv3 benchmark leaderboard: (add link)</li>
|
| 184 |
</ul>
|
| 185 |
</div>
|
| 186 |
</div>
|
notebooks/00_quickstart_inference.ipynb
CHANGED
|
@@ -9,8 +9,8 @@
|
|
| 9 |
"\n",
|
| 10 |
"This notebook demonstrates how to run **quick inference** with both the pre- and post-trained NTv3 checkpoints:\n",
|
| 11 |
"\n",
|
| 12 |
-
"- **Pre-trained (MLM-focused):** `InstaDeepAI/
|
| 13 |
-
"- **Post-trained (task heads):** `InstaDeepAI/
|
| 14 |
"\n",
|
| 15 |
"We show how to:\n",
|
| 16 |
"\n",
|
|
@@ -105,7 +105,7 @@
|
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"cell_type": "code",
|
| 108 |
-
"execution_count":
|
| 109 |
"id": "336bb40c",
|
| 110 |
"metadata": {},
|
| 111 |
"outputs": [
|
|
@@ -260,7 +260,7 @@
|
|
| 260 |
}
|
| 261 |
],
|
| 262 |
"source": [
|
| 263 |
-
"pretrained_model_name = \"InstaDeepAI/
|
| 264 |
"\n",
|
| 265 |
"# Load tokenizer/model\n",
|
| 266 |
"tok_pre = AutoTokenizer.from_pretrained(pretrained_model_name, trust_remote_code=True)\n",
|
|
@@ -318,7 +318,7 @@
|
|
| 318 |
}
|
| 319 |
],
|
| 320 |
"source": [
|
| 321 |
-
"posttrained_model_name = \"InstaDeepAI/
|
| 322 |
"\n",
|
| 323 |
"# Load config/tokenizers/model\n",
|
| 324 |
"cfg_pos = AutoConfig.from_pretrained(posttrained_model_name, trust_remote_code=True)\n",
|
|
@@ -345,10 +345,12 @@
|
|
| 345 |
" output_attentions=True,\n",
|
| 346 |
")\n",
|
| 347 |
"\n",
|
| 348 |
-
"#
|
| 349 |
-
"print(out[\"bigwig_tracks_logits\"].shape)
|
| 350 |
-
"
|
| 351 |
-
"print(out[\"
|
|
|
|
|
|
|
| 352 |
]
|
| 353 |
}
|
| 354 |
],
|
|
|
|
| 9 |
"\n",
|
| 10 |
"This notebook demonstrates how to run **quick inference** with both the pre- and post-trained NTv3 checkpoints:\n",
|
| 11 |
"\n",
|
| 12 |
+
"- **Pre-trained (MLM-focused):** `InstaDeepAI/NTv3_8M_pre`, `InstaDeepAI/NTv3_100M_pre`, `InstaDeepAI/NTv3_650M_pre`\n",
|
| 13 |
+
"- **Post-trained (task heads):** `InstaDeepAI/NTv3_100M`, `InstaDeepAI/NTv3_650M`\n",
|
| 14 |
"\n",
|
| 15 |
"We show how to:\n",
|
| 16 |
"\n",
|
|
|
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"cell_type": "code",
|
| 108 |
+
"execution_count": null,
|
| 109 |
"id": "336bb40c",
|
| 110 |
"metadata": {},
|
| 111 |
"outputs": [
|
|
|
|
| 260 |
}
|
| 261 |
],
|
| 262 |
"source": [
|
| 263 |
+
"pretrained_model_name = \"InstaDeepAI/NTv3_8M_pre\"\n",
|
| 264 |
"\n",
|
| 265 |
"# Load tokenizer/model\n",
|
| 266 |
"tok_pre = AutoTokenizer.from_pretrained(pretrained_model_name, trust_remote_code=True)\n",
|
|
|
|
| 318 |
}
|
| 319 |
],
|
| 320 |
"source": [
|
| 321 |
+
"posttrained_model_name = \"InstaDeepAI/NTv3_100M\"\n",
|
| 322 |
"\n",
|
| 323 |
"# Load config/tokenizers/model\n",
|
| 324 |
"cfg_pos = AutoConfig.from_pretrained(posttrained_model_name, trust_remote_code=True)\n",
|
|
|
|
| 345 |
" output_attentions=True,\n",
|
| 346 |
")\n",
|
| 347 |
"\n",
|
| 348 |
+
"# 7k human tracks over 37.5 % center region of the input sequence\n",
|
| 349 |
+
"print(\"bigwig_tracks_logits:\", out[\"bigwig_tracks_logits\"].shape)\n",
|
| 350 |
+
"# Location of 21 genomic elements over 37.5 % center region of the input sequence\n",
|
| 351 |
+
"print(\"bed_tracks_logits:\", out[\"bed_tracks_logits\"].shape)\n",
|
| 352 |
+
"# Language model logits for whole sequence over vocabulary\n",
|
| 353 |
+
"print(\"language model logits:\", out[\"logits\"].shape)"
|
| 354 |
]
|
| 355 |
}
|
| 356 |
],
|
notebooks/01_tracks_prediction.ipynb
CHANGED
|
@@ -112,7 +112,7 @@
|
|
| 112 |
"# -----------------------------\n",
|
| 113 |
"# User inputs\n",
|
| 114 |
"# -----------------------------\n",
|
| 115 |
-
"model_name = \"InstaDeepAI/
|
| 116 |
"\n",
|
| 117 |
"# Example window from a given species (edit these) - needs to be multiple of 128 due to the model downsampling\n",
|
| 118 |
"assembly = \"hg38\"\n",
|
|
|
|
| 112 |
"# -----------------------------\n",
|
| 113 |
"# User inputs\n",
|
| 114 |
"# -----------------------------\n",
|
| 115 |
+
"model_name = \"InstaDeepAI/NTv3_100M\" # options: \"InstaDeepAI/ntv3_106M_7downsample_post_trained_1mb\" or \"InstaDeepAI/ntv3_650M_7downsample_post_trained_1mb_v2\"\n",
|
| 116 |
"\n",
|
| 117 |
"# Example window from a given species (edit these) - needs to be multiple of 128 due to the model downsampling\n",
|
| 118 |
"assembly = \"hg38\"\n",
|