Spaces:

InstaDeepAI
/

ntv3

Running

App Files Files Community

bernardo-de-almeida commited on Dec 22, 2025

Commit

d297f70

1 Parent(s): 2dae583

small fixes and add interpretability pipeline notebook

Browse files

Files changed (5) hide show

notebooks_pipelines/01_functional_track_prediction.ipynb +6 -6
notebooks_pipelines/02_functional_interpretation.ipynb +0 -0
notebooks_tutorials/00_quickstart_inference.ipynb +35 -22
notebooks_tutorials/01_tracks_prediction.ipynb +3 -229
tabs/home.html +2 -3

notebooks_pipelines/01_functional_track_prediction.ipynb CHANGED Viewed

@@ -297,7 +297,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "84f013f6",
    "metadata": {},
    "outputs": [
@@ -340,14 +340,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "0e373749",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<div id=\"jb_4813434_buttons\"></div><div id=\"jb_4813434_igvcontainer\"></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -358,7 +358,7 @@
     },
     {
      "data": {
-      "application/javascript": "window.igv.MessageHandler.on({\"id\": \"jb_4813434\", \"command\": \"createBrowser\", \"data\": {\"genome\": \"hg38\", \"locus\": \"chr19:6740960-6790112\", \"tracks\": [{\"name\": \"K562 RNA-seq\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_RNA_seq.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"K562 DNAse\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_DNAse.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"K562 H3k4me3\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_H3k4me3.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"K562 CTCF\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_CTCF.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 RNA-seq\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_RNA_seq.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 DNAse\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_DNAse.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 H3k4me3\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_H3k4me3.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 CTCF\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_CTCF.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}], \"id\": \"jb_4813434\"}})",
       "text/plain": [
        "<IPython.core.display.Javascript object>"
       ]
@@ -369,10 +369,10 @@
     {
      "data": {
       "text/plain": [
-       "<igv_notebook.browser.Browser at 0x33ddd15a0>"
       ]
      },
-     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }

   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "84f013f6",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "0e373749",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
+       "<div id=\"jb_8720993_buttons\"></div><div id=\"jb_8720993_igvcontainer\"></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
     },
     {
      "data": {
+      "application/javascript": "window.igv.MessageHandler.on({\"id\": \"jb_8720993\", \"command\": \"createBrowser\", \"data\": {\"genome\": \"hg38\", \"locus\": \"chr19:6740960-6790112\", \"tracks\": [{\"name\": \"K562 RNA-seq\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_RNA_seq.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"K562 DNAse\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_DNAse.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"K562 H3k4me3\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_H3k4me3.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"K562 CTCF\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/K562_CTCF.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 RNA-seq\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_RNA_seq.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 DNAse\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_DNAse.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 H3k4me3\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_H3k4me3.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}, {\"name\": \"HepG2 CTCF\", \"format\": \"bigwig\", \"url\": \"bigwig_outputs/HepG2_CTCF.bw\", \"height\": 70, \"autoscale\": true, \"displayMode\": \"EXPANDED\"}], \"id\": \"jb_8720993\"}})",
       "text/plain": [
        "<IPython.core.display.Javascript object>"
       ]
     {
      "data": {
       "text/plain": [
+       "<igv_notebook.browser.Browser at 0x33bd035b0>"
       ]
      },
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }

notebooks_pipelines/02_functional_interpretation.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks_tutorials/00_quickstart_inference.ipynb CHANGED Viewed

@@ -80,6 +80,30 @@
         "print(\"torch_dtype:\", torch_dtype)"
       ]
     },
     {
       "cell_type": "markdown",
       "id": "82146876",
@@ -95,7 +119,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
       "id": "336bb40c",
       "metadata": {},
       "outputs": [
@@ -103,8 +127,7 @@
           "name": "stdout",
           "output_type": "stream",
           "text": [
-            "torch.Size([2, 128, 11])\n",
-            "MLM logits shape: (2, 128, 11)\n"
           ]
         }
       ],
@@ -115,13 +138,11 @@
         "tok_pre = AutoTokenizer.from_pretrained(pretrained_model_name, trust_remote_code=True)\n",
         "model_pre = AutoModelForMaskedLM.from_pretrained(pretrained_model_name, trust_remote_code=True)\n",
         "\n",
-        "# Example: human sequence\n",
-        "seqs = [\"ATCGNATCG\", \"ACGT\"]\n",
         "batch = tok_pre(seqs, add_special_tokens=False, padding=True, pad_to_multiple_of=128, return_tensors=\"pt\")\n",
         "out = model_pre(**batch)\n",
         "\n",
-        "print(out.logits.shape)       # (B, L, V = 11)\n",
-        "\n",
         "# Access MLM logits\n",
         "mlm_logits = out[\"logits\"]\n",
         "print(\"MLM logits shape:\", tuple(mlm_logits.shape))"
@@ -144,7 +165,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
       "id": "6cc5f2df",
       "metadata": {},
       "outputs": [
@@ -153,9 +174,9 @@
           "output_type": "stream",
           "text": [
             "Supported species: dict_keys(['<bos>', '<cls>', '<eos>', '<mask>', '<pad>', '<unk>', 'amphiprion_ocellaris', 'arabidopsis_thaliana', 'bison_bison_bison', 'caenorhabditis_elegans', 'canis_lupus_familiaris', 'chinchilla_lanigera', 'ciona_intestinalis', 'danio_rerio', 'drosophila_melanogaster', 'felis_catus', 'gallus_gallus', 'glycine_max', 'gorilla_gorilla', 'gossypium_hirsutum', 'human', 'macaca_nemestrina', 'mouse', 'oryza_sativa', 'rattus_norvegicus', 'salmo_trutta', 'serinus_canaria', 'tetraodon_nigroviridis', 'triticum_aestivum', 'zea_mays'])\n",
-            "bigwig_tracks_logits: (2, 48, 7362)\n",
-            "bed_tracks_logits: (2, 48, 21, 2)\n",
-            "language model logits: (2, 128, 11)\n"
           ]
         }
       ],
@@ -166,12 +187,12 @@
         "tok_post = AutoTokenizer.from_pretrained(post_trained_model_name, trust_remote_code=True)\n",
         "model_post = AutoModel.from_pretrained(post_trained_model_name, trust_remote_code=True)\n",
         "\n",
-        "# Prepare inputs\n",
-        "batch = tok_post([\"ATCGNATCG\", \"ACGT\"], add_special_tokens=False, padding=True, pad_to_multiple_of=128, return_tensors=\"pt\")\n",
         "\n",
         "# To show all supported species: \n",
         "print(\"Supported species:\", model_post.config.species_to_token_id.keys())\n",
-        "# Species tokens\n",
         "species = ['human', 'mouse']\n",
         "species_ids = model_post.encode_species(species)\n",
         "\n",
@@ -188,14 +209,6 @@
         "# Language model logits for whole sequence over vocabulary\n",
         "print(\"language model logits:\", tuple(out[\"logits\"].shape))\n"
       ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "037076cd",
-      "metadata": {},
-      "outputs": [],
-      "source": []
     }
   ],
   "metadata": {

         "print(\"torch_dtype:\", torch_dtype)"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "id": "ef0e6d69",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            " Sequence lengths: [128, 512]\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Dummy DNA sequences\n",
+        "seqs = [\n",
+        "    \"ACGT\" * 32,\n",
+        "    \"ACGT\" * 128\n",
+        "]\n",
+        "\n",
+        "print(\" Sequence lengths:\", [len(s) for s in seqs])"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "82146876",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "id": "336bb40c",
       "metadata": {},
       "outputs": [
           "name": "stdout",
           "output_type": "stream",
           "text": [
+            "MLM logits shape: (2, 512, 11)\n"
           ]
         }
       ],
         "tok_pre = AutoTokenizer.from_pretrained(pretrained_model_name, trust_remote_code=True)\n",
         "model_pre = AutoModelForMaskedLM.from_pretrained(pretrained_model_name, trust_remote_code=True)\n",
         "\n",
+        "# Example inference\n",
+        "# Tokenization will pad all sequences to multiple of 128\n",
         "batch = tok_pre(seqs, add_special_tokens=False, padding=True, pad_to_multiple_of=128, return_tensors=\"pt\")\n",
         "out = model_pre(**batch)\n",
         "\n",
         "# Access MLM logits\n",
         "mlm_logits = out[\"logits\"]\n",
         "print(\"MLM logits shape:\", tuple(mlm_logits.shape))"
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "id": "6cc5f2df",
       "metadata": {},
       "outputs": [
           "output_type": "stream",
           "text": [
             "Supported species: dict_keys(['<bos>', '<cls>', '<eos>', '<mask>', '<pad>', '<unk>', 'amphiprion_ocellaris', 'arabidopsis_thaliana', 'bison_bison_bison', 'caenorhabditis_elegans', 'canis_lupus_familiaris', 'chinchilla_lanigera', 'ciona_intestinalis', 'danio_rerio', 'drosophila_melanogaster', 'felis_catus', 'gallus_gallus', 'glycine_max', 'gorilla_gorilla', 'gossypium_hirsutum', 'human', 'macaca_nemestrina', 'mouse', 'oryza_sativa', 'rattus_norvegicus', 'salmo_trutta', 'serinus_canaria', 'tetraodon_nigroviridis', 'triticum_aestivum', 'zea_mays'])\n",
+            "bigwig_tracks_logits: (2, 192, 7362)\n",
+            "bed_tracks_logits: (2, 192, 21, 2)\n",
+            "language model logits: (2, 512, 11)\n"
           ]
         }
       ],
         "tok_post = AutoTokenizer.from_pretrained(post_trained_model_name, trust_remote_code=True)\n",
         "model_post = AutoModel.from_pretrained(post_trained_model_name, trust_remote_code=True)\n",
         "\n",
+        "# Prepare inputs - tokenization will pad all sequences to multiple of 128\n",
+        "batch = tok_post(seqs, add_special_tokens=False, padding=True, pad_to_multiple_of=128, return_tensors=\"pt\")\n",
         "\n",
         "# To show all supported species: \n",
         "print(\"Supported species:\", model_post.config.species_to_token_id.keys())\n",
+        "# Species tokens (one per sequence)\n",
         "species = ['human', 'mouse']\n",
         "species_ids = model_post.encode_species(species)\n",
         "\n",
         "# Language model logits for whole sequence over vocabulary\n",
         "print(\"language model logits:\", tuple(out[\"logits\"].shape))\n"
       ]
     }
   ],
   "metadata": {

notebooks_tutorials/01_tracks_prediction.ipynb CHANGED Viewed

@@ -106,232 +106,6 @@
         "Set your NTv3 model and genomic window here"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "6193fd37",
-      "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>file_id</th>\n",
-              "      <th>biosample_type</th>\n",
-              "      <th>tissue</th>\n",
-              "      <th>assay</th>\n",
-              "      <th>strand</th>\n",
-              "      <th>experiment_target</th>\n",
-              "      <th>specie</th>\n",
-              "      <th>dataset</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>SRX20249461</td>\n",
-              "      <td>tissue</td>\n",
-              "      <td>Leaf (17 days)</td>\n",
-              "      <td>TF ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>JMJ20</td>\n",
-              "      <td>glycine_max</td>\n",
-              "      <td>ncbi_chrom_acc</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>SRX20249462</td>\n",
-              "      <td>tissue</td>\n",
-              "      <td>Leaf (17 days)</td>\n",
-              "      <td>TF ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>FLAG</td>\n",
-              "      <td>glycine_max</td>\n",
-              "      <td>ncbi_chrom_acc</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>SRX21859141</td>\n",
-              "      <td>tissue</td>\n",
-              "      <td>Seed (60 days)</td>\n",
-              "      <td>Histone ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>H3K27me3</td>\n",
-              "      <td>glycine_max</td>\n",
-              "      <td>ncbi_chrom_acc</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>SRX21859142</td>\n",
-              "      <td>tissue</td>\n",
-              "      <td>Seed (60 days)</td>\n",
-              "      <td>Histone ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>H3K27me3</td>\n",
-              "      <td>glycine_max</td>\n",
-              "      <td>ncbi_chrom_acc</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>SRX21859143</td>\n",
-              "      <td>tissue</td>\n",
-              "      <td>Seed (60 days)</td>\n",
-              "      <td>Histone ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>H3K4me3</td>\n",
-              "      <td>glycine_max</td>\n",
-              "      <td>ncbi_chrom_acc</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>...</th>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>15884</th>\n",
-              "      <td>GSM874952</td>\n",
-              "      <td>Unknown</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>TF ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>RPB2</td>\n",
-              "      <td>mouse</td>\n",
-              "      <td>geo</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>15885</th>\n",
-              "      <td>GSM874953</td>\n",
-              "      <td>Unknown</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>TF ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>RPB2</td>\n",
-              "      <td>mouse</td>\n",
-              "      <td>geo</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>15886</th>\n",
-              "      <td>GSM874954</td>\n",
-              "      <td>Unknown</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>TF ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>RPB2</td>\n",
-              "      <td>mouse</td>\n",
-              "      <td>geo</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>15887</th>\n",
-              "      <td>GSM874955</td>\n",
-              "      <td>Unknown</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>TF ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>RPB2</td>\n",
-              "      <td>mouse</td>\n",
-              "      <td>geo</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>15888</th>\n",
-              "      <td>GSM874956</td>\n",
-              "      <td>Unknown</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>TF ChIP-seq</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>RPB2</td>\n",
-              "      <td>mouse</td>\n",
-              "      <td>geo</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>15889 rows × 8 columns</p>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "           file_id biosample_type          tissue             assay strand  \\\n",
-              "0      SRX20249461         tissue  Leaf (17 days)       TF ChIP-seq    NaN   \n",
-              "1      SRX20249462         tissue  Leaf (17 days)       TF ChIP-seq    NaN   \n",
-              "2      SRX21859141         tissue  Seed (60 days)  Histone ChIP-seq    NaN   \n",
-              "3      SRX21859142         tissue  Seed (60 days)  Histone ChIP-seq    NaN   \n",
-              "4      SRX21859143         tissue  Seed (60 days)  Histone ChIP-seq    NaN   \n",
-              "...            ...            ...             ...               ...    ...   \n",
-              "15884    GSM874952        Unknown             NaN       TF ChIP-seq    NaN   \n",
-              "15885    GSM874953        Unknown             NaN       TF ChIP-seq    NaN   \n",
-              "15886    GSM874954        Unknown             NaN       TF ChIP-seq    NaN   \n",
-              "15887    GSM874955        Unknown             NaN       TF ChIP-seq    NaN   \n",
-              "15888    GSM874956        Unknown             NaN       TF ChIP-seq    NaN   \n",
-              "\n",
-              "      experiment_target       specie         dataset  \n",
-              "0                 JMJ20  glycine_max  ncbi_chrom_acc  \n",
-              "1                  FLAG  glycine_max  ncbi_chrom_acc  \n",
-              "2              H3K27me3  glycine_max  ncbi_chrom_acc  \n",
-              "3              H3K27me3  glycine_max  ncbi_chrom_acc  \n",
-              "4               H3K4me3  glycine_max  ncbi_chrom_acc  \n",
-              "...                 ...          ...             ...  \n",
-              "15884              RPB2        mouse             geo  \n",
-              "15885              RPB2        mouse             geo  \n",
-              "15886              RPB2        mouse             geo  \n",
-              "15887              RPB2        mouse             geo  \n",
-              "15888              RPB2        mouse             geo  \n",
-              "\n",
-              "[15889 rows x 8 columns]"
-            ]
-          },
-          "execution_count": 15,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import pandas as pd\n",
-        "\n",
-        "df = pd.read_csv(\"/Users/b.dealmeida/Downloads/Supplementary_tables - Post-training functional tracks.tsv\", sep=\"\\t\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "id": "5f686ba9",
-      "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "2765"
-            ]
-          },
-          "execution_count": 17,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "len(df.tissue.unique())"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -348,9 +122,9 @@
         "species = \"human\"  # will use for condition the model on species\n",
         "assembly = \"hg38\"  # will use for fetching the chromosome sequence\n",
         "chrom = \"chr19\"\n",
-        "start = 6_749_152\n",
-        "end   = 6_781_920\n",
-        "# Using center 32kb window (32,768 bp) for faster inference\n",
         "\n",
         "# Optional\n",
         "HF_TOKEN = os.getenv(\"HF_TOKEN\", None)"

         "Set your NTv3 model and genomic window here"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
         "species = \"human\"  # will use for condition the model on species\n",
         "assembly = \"hg38\"  # will use for fetching the chromosome sequence\n",
         "chrom = \"chr19\"\n",
+        "start = 6_700_000\n",
+        "end   = 6_765_536\n",
+        "# Limiting to 65kb to work on Google Colab T4 GPU -> increase up to 1 million nucleotides if you have a better GPU\n",
         "\n",
         "# Optional\n",
         "HF_TOKEN = os.getenv(\"HF_TOKEN\", None)"

tabs/home.html CHANGED Viewed

@@ -94,9 +94,8 @@
       <h2>📓 Pipeline notebooks (browse <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/tree/main/notebooks_pipelines" target="_blank" rel="noopener noreferrer">folder</a>)</h2>
       <ul>
         <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/01_functional_track_prediction.ipynb" target="_blank" rel="noopener noreferrer">🎯 01 — Generate bigwig predictions for certain tracks</a></li>
-        <li>🎯 02 — Fine-tune on bigwig tracks</li>
-        <li>🔍 03 — Interpret a given genomic region</li>
-        <li>🧪 04 — Sequence generation <em>(coming soon)</em></li>
       </ul>
     </div>
     <div class="card">

       <h2>📓 Pipeline notebooks (browse <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/tree/main/notebooks_pipelines" target="_blank" rel="noopener noreferrer">folder</a>)</h2>
       <ul>
         <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/01_functional_track_prediction.ipynb" target="_blank" rel="noopener noreferrer">🎯 01 — Generate bigwig predictions for certain tracks</a></li>
+        <li><a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/02_functional_interpretation.ipynb" target="_blank" rel="noopener noreferrer">🔍 02 — Interpret a given genomic region</a></li>
+        <li>🧪 03 — Sequence generation <em>(coming soon)</em></li>
       </ul>
     </div>
     <div class="card">