bernardo-de-almeida commited on
Commit
5340274
·
1 Parent(s): eeb19dd

new notebook structure

Browse files
index.html CHANGED
@@ -305,13 +305,22 @@
305
 
306
  <div class="card-stack">
307
  <div class="card">
308
- <h2>📓 Notebooks (browse <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/tree/main/notebooks" target="_blank" rel="noopener">folder</a>)</h2>
309
  <ul>
310
  <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks/00_quickstart_inference.ipynb" target="_blank" rel="noopener">🚀 00 — Quickstart inference</a></li>
311
  <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks/01_tracks_prediction.ipynb" target="_blank" rel="noopener">📊 01 — Tracks prediction</a></li>
312
- <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks/02_genome_annotation.ipynb" target="_blank" rel="noopener">🏷️ 02 — Genome annotation / segmentation</a></li>
 
 
 
 
 
 
 
 
 
313
  <li>🎯 03 — Fine-tune on bigwig tracks</li>
314
- <li>🔍 04 — Model interpretation</li>
315
  <li>🧪 05 — Sequence generation</li>
316
  </ul>
317
  </div>
@@ -361,16 +370,20 @@ print(len(out.attentions)) # equals transformer layers = 12
361
  <div class="card">
362
  <h2>💻 Use a post-trained model</h2>
363
  <p>Here is a quick example of how to use the post-trained NTv3 650M model to predict tracks for a human genomic window.</p>
364
- <div class="code"><pre><code class="language-python">from transformers import AutoConfig
 
365
 
366
- model_name = "InstaDeepAI/NTv3_650M"
367
 
368
- # Load track prediction pipeline
369
- cfg = AutoConfig.from_pretrained(model_name, trust_remote_code=True, force_download=True)
370
- pipe = cfg.load_tracks_pipeline(model_name, device="auto") # or "cpu"/"cuda"/"mps"
 
 
 
371
 
372
  # Run track prediction
373
- out = pipe(
374
  {
375
  "chrom": "chr19",
376
  "start": 6_700_000,
@@ -399,7 +412,7 @@ print("language model logits:", tuple(out.mlm_logits.shape))</code></pre></div>
399
  }
400
  elements_to_plot = ["protein_coding_gene", "exon", "intron", "splice_donor", "splice_acceptor"]
401
 
402
- out = pipe(
403
  {"chrom": "chr19", "start": 6_700_000, "end": 6_831_072, "species": "human"},
404
  plot=True,
405
  tracks_to_plot=tracks_to_plot,
 
305
 
306
  <div class="card-stack">
307
  <div class="card">
308
+ <h2>📓 Tutorial notebooks (browse <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/tree/main/notebooks_tutorials" target="_blank" rel="noopener">folder</a>)</h2>
309
  <ul>
310
  <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks/00_quickstart_inference.ipynb" target="_blank" rel="noopener">🚀 00 — Quickstart inference</a></li>
311
  <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks/01_tracks_prediction.ipynb" target="_blank" rel="noopener">📊 01 — Tracks prediction</a></li>
312
+ <li>🎯 02 — Fine-tune on bigwig tracks</li>
313
+ <li>🔍 03 — Model interpretation</li>
314
+ <li>🧪 04 — Training NTv3 generative </li>
315
+ </ul>
316
+ </div>
317
+ <div class="card">
318
+ <h2>📓 Pipelines notebooks (browse <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/tree/main/notebooks_pipelines" target="_blank" rel="noopener">folder</a>)</h2>
319
+ <ul>
320
+ <li> 🎯 01 — Generate bigwig predictions for certain tracks</li>
321
+ <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/02_genome_annotation.ipynb" target="_blank" rel="noopener">🏷️ 02 — Genome annotation / segmentation</a></li>
322
  <li>🎯 03 — Fine-tune on bigwig tracks</li>
323
+ <li>🔍 04 — Interpret a given genomic region</li>
324
  <li>🧪 05 — Sequence generation</li>
325
  </ul>
326
  </div>
 
370
  <div class="card">
371
  <h2>💻 Use a post-trained model</h2>
372
  <p>Here is a quick example of how to use the post-trained NTv3 650M model to predict tracks for a human genomic window.</p>
373
+ <div class="code"><pre><code class="language-python">from transformers import pipeline
374
+ import torch
375
 
376
+ model_name = "InstaDeepAI/NTv3_650M_pos"
377
 
378
+ ntv3_tracks = pipeline(
379
+ "ntv3-tracks",
380
+ model=model_name,
381
+ trust_remote_code=True,
382
+ device=0 if torch.cuda.is_available() else -1,
383
+ )
384
 
385
  # Run track prediction
386
+ out = ntv3_tracks(
387
  {
388
  "chrom": "chr19",
389
  "start": 6_700_000,
 
412
  }
413
  elements_to_plot = ["protein_coding_gene", "exon", "intron", "splice_donor", "splice_acceptor"]
414
 
415
+ out = ntv3_tracks(
416
  {"chrom": "chr19", "start": 6_700_000, "end": 6_831_072, "species": "human"},
417
  plot=True,
418
  tracks_to_plot=tracks_to_plot,
notebooks/02_genome_annotation.ipynb → notebooks_pipelines/01_genome_annotation.ipynb RENAMED
@@ -127,7 +127,7 @@
127
  },
128
  {
129
  "cell_type": "code",
130
- "execution_count": 6,
131
  "id": "4857d15c",
132
  "metadata": {},
133
  "outputs": [
@@ -274,7 +274,6 @@
274
  " model=model_name,\n",
275
  " trust_remote_code=True,\n",
276
  " device=0 if torch.cuda.is_available() else -1,\n",
277
- " force_download=True,\n",
278
  ")\n",
279
  "\n",
280
  "# Run pipeline: DNA -> NTv3 -> HMM -> GFF3\n",
 
127
  },
128
  {
129
  "cell_type": "code",
130
+ "execution_count": null,
131
  "id": "4857d15c",
132
  "metadata": {},
133
  "outputs": [
 
274
  " model=model_name,\n",
275
  " trust_remote_code=True,\n",
276
  " device=0 if torch.cuda.is_available() else -1,\n",
 
277
  ")\n",
278
  "\n",
279
  "# Run pipeline: DNA -> NTv3 -> HMM -> GFF3\n",
{notebooks → notebooks_tutorial}/00_quickstart_inference.ipynb RENAMED
File without changes
{notebooks → notebooks_tutorial}/01_tracks_prediction.ipynb RENAMED
File without changes