Spaces:

InstaDeepAI
/

ntv3

Running

App Files Files Community

ybornachot commited on Dec 12, 2025

Commit

65f032b

1 Parent(s): b6b1c80

feat: enhanced dataset with multiprocessing compatibility + added documentation

Browse files

Files changed (2) hide show

.gitattributes +1 -0
notebooks/03_fine_tuning.ipynb +3 -1425

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.ipynb filter=lfs diff=lfs merge=lfs -text

notebooks/03_fine_tuning.ipynb CHANGED Viewed

@@ -1,1425 +1,3 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Simple PyTorch Tracks Fine-Tuning Pipeline\n",
-    "\n",
-    "This notebook implements a simple PyTorch-based deep learning pipeline for tracks prediction fine-tuning.\n",
-    "\n",
-    "## Overview\n",
-    "- Loads a HuggingFace model (NTv3) as backbone\n",
-    "- Adds a prediction head for bigwig tracks\n",
-    "- Fine-tunes on tracks prediction with a simple training loop\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Install useful dependencies\n",
-    "# !pip install pyBigWig\n",
-    "# !pip install pyfaidx\n",
-    "# !pip install torchmetrics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# 0. Imports\n",
-    "import random\n",
-    "import functools\n",
-    "from typing import List, Dict, Callable\n",
-    "import os\n",
-    "import subprocess\n",
-    "\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "from torch.utils.data import Dataset, DataLoader\n",
-    "from torch.optim import AdamW\n",
-    "from transformers import AutoConfig, AutoModelForMaskedLM, AutoTokenizer\n",
-    "import numpy as np\n",
-    "import pyBigWig\n",
-    "from pyfaidx import Fasta\n",
-    "from torchmetrics import PearsonCorrCoef\n",
-    "import plotly.graph_objects as go\n",
-    "from plotly.subplots import make_subplots\n",
-    "from IPython.display import display"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 1. Configuration setup\n",
-    "\n",
-    "## Configuration Parameters\n",
-    "\n",
-    "### Model\n",
-    "- **`model_name`**: HuggingFace model name/identifier for the pretrained backbone model\n",
-    "\n",
-    "### Data\n",
-    "- **`data_cache_dir`**: Directory where downloaded data files (FASTA, bigWig) will be stored\n",
-    "- **`fasta_url`**: URL to download reference genome FASTA file\n",
-    "- **`bigwig_url_list`**: List of URLs for bigWig track files to download\n",
-    "- **`sequence_length`**: Length of input sequences in base pairs (bp)\n",
-    "- **`keep_target_center_fraction`**: Fraction of center sequence to keep for target prediction (crops edges to focus on center)\n",
-    "\n",
-    "### Training\n",
-    "- **`batch_size`**: Number of samples per batch\n",
-    "- **`learning_rate`**: Constant learning rate for optimizer\n",
-    "- **`weight_decay`**: L2 regularization coefficient for optimizer\n",
-    "- **`num_steps_training`**: Total number of training steps\n",
-    "- **`log_every_n_steps`**: Log training metrics every N steps\n",
-    "- **`validate_every_n_steps`**: Run validation every N steps\n",
-    "\n",
-    "### Validation\n",
-    "- **`num_validation_samples`**: Number of samples to use for validation set\n",
-    "\n",
-    "### General\n",
-    "- **`seed`**: Random seed for reproducibility\n",
-    "- **`device`**: Device to run training on (\"cuda\" or \"cpu\")\n",
-    "- **`num_workers`**: Number of worker processes for DataLoader (0 = single-threaded)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using device: cpu\n"
-     ]
-    }
-   ],
-   "source": [
-    "config = {\n",
-    "    # Model\n",
-    "    \"model_name\": \"InstaDeepAI/ntv3_8M_7downsample_pretrained_le_1mb\",\n",
-    "    \n",
-    "    # Data\n",
-    "    \"data_cache_dir\": \"./data\",\n",
-    "    \"fasta_url\": \"https://hgdownload.gi.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz\",\n",
-    "    \"bigwig_url_list\": [\n",
-    "        \"https://www.encodeproject.org/files/ENCFF884LDL/@@download/ENCFF884LDL.bigWig\"\n",
-    "    ],\n",
-    "    \"sequence_length\": 1_024,\n",
-    "    \"keep_target_center_fraction\": 0.375,\n",
-    "    \n",
-    "    # Training\n",
-    "    \"batch_size\": 8,\n",
-    "    \"num_steps_training\": 1000,\n",
-    "    \"log_every_n_steps\": 10,\n",
-    "    \"learning_rate\": 1e-5,\n",
-    "    \"weight_decay\": 0.01,\n",
-    "    \n",
-    "    # Validation\n",
-    "    \"validate_every_n_steps\": 50,\n",
-    "    \"num_validation_samples\": 100,\n",
-    "    \n",
-    "    # General\n",
-    "    \"seed\": 42,\n",
-    "    \"device\": \"cuda\" if torch.cuda.is_available() else \"cpu\",\n",
-    "    \"num_workers\": 0,\n",
-    "}\n",
-    "\n",
-    "os.makedirs(config[\"data_cache_dir\"], exist_ok=True)\n",
-    "\n",
-    "# Extract filenames from URLs\n",
-    "def extract_filename_from_url(url: str) -> str:\n",
-    "    \"\"\"Extract filename from URL, handling query parameters.\"\"\"\n",
-    "    # Remove query parameters if present\n",
-    "    url_clean = url.split('?')[0]\n",
-    "    # Get the last part of the URL path\n",
-    "    return url_clean.split('/')[-1]\n",
-    "\n",
-    "# Create paths for downloaded files\n",
-    "fasta_path = os.path.join(config[\"data_cache_dir\"], extract_filename_from_url(config[\"fasta_url\"]).replace('.gz', ''))\n",
-    "bigwig_path_list = [\n",
-    "    os.path.join(config[\"data_cache_dir\"], extract_filename_from_url(url))\n",
-    "    for url in config[\"bigwig_url_list\"]\n",
-    "]\n",
-    "\n",
-    "# Create bigwig_file_ids from filenames (without extension)\n",
-    "config[\"bigwig_file_ids\"] = [\n",
-    "    os.path.splitext(extract_filename_from_url(url))[0]\n",
-    "    for url in config[\"bigwig_url_list\"]\n",
-    "]\n",
-    "\n",
-    "# Set random seed\n",
-    "torch.manual_seed(config[\"seed\"])\n",
-    "np.random.seed(config[\"seed\"])\n",
-    "\n",
-    "# Set device\n",
-    "device = torch.device(config[\"device\"])\n",
-    "print(f\"Using device: {device}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 2. Data download"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--2025-12-10 14:47:06--  https://hgdownload.gi.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz\n",
-      "Resolving hgdownload.gi.ucsc.edu (hgdownload.gi.ucsc.edu)... 128.114.119.163\n",
-      "Connecting to hgdownload.gi.ucsc.edu (hgdownload.gi.ucsc.edu)|128.114.119.163|:443... connected.\n",
-      "HTTP request sent, awaiting response... 200 OK\n",
-      "Length: 983659424 (938M) [application/x-gzip]\n",
-      "Saving to: './data/hg38.fa.gz'\n",
-      "\n",
-      "hg38.fa.gz          100%[===================>] 938.09M  10.4MB/s    in 1m 43s  \n",
-      "\n",
-      "2025-12-10 14:48:50 (9.09 MB/s) - './data/hg38.fa.gz' saved [983659424/983659424]\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Download fasta file\n",
-    "!wget -c {config[\"fasta_url\"]} -P {config[\"data_cache_dir\"]}/ && gunzip -f {config[\"data_cache_dir\"]}/{config[\"fasta_url\"].split(os.path.sep)[-1]}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloading ENCFF884LDL.bigWig...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "--2025-12-10 14:54:41--  https://www.encodeproject.org/files/ENCFF884LDL/@@download/ENCFF884LDL.bigWig\n",
-      "Resolving www.encodeproject.org (www.encodeproject.org)... 34.211.244.144\n",
-      "Connecting to www.encodeproject.org (www.encodeproject.org)|34.211.244.144|:443... connected.\n",
-      "HTTP request sent, awaiting response... 307 Temporary Redirect\n",
-      "Location: https://encode-public.s3.amazonaws.com/2020/09/19/425880b6-b323-4ee2-95ce-56bdd088d126/ENCFF884LDL.bigWig?response-content-disposition=attachment%3B%20filename%3DENCFF884LDL.bigWig&AWSAccessKeyId=ASIATGZNGCNXU6SGJVOL&Signature=4o0Pp2RvJtnZc9z7HOuCU1k9wwI%3D&x-amz-security-token=IQoJb3JpZ2luX2VjEA0aCXVzLXdlc3QtMiJGMEQCIEdyOOxtHk6rJT06xIjzZR3nVyqbPB1twIFxCDtIQfNXAiAph1lc69CfHzPPglodVnVh9QCjlsXHFyUEU3K0%2Bx%2F%2Bziq8BQjW%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDIyMDc0ODcxNDg2MyIMYwkeEaXuk%2BE48EDAKpAFkm4uzCSB40oRz3YT4m%2FZfBSH7XIuSCuzS7nrL5tXb9Q2rfPQSD4PHOyTR0LOOfcr98%2FyF8cJw4NE%2Fwsw8BRs4xPFEEyN6yGqwHmAyxBuwdca4GLSMGRDaSPoleMJw1FcSv96ofbZFYTTSol4b6%2FZj4jJjCa887%2F6S5x9kNIjTAtgX%2Fr3Ci4wi4FXGKTijTU%2FnbuuLZ3Cz2UobD6p732apsayl7avmUdWbUvROl3sHFOWOGCKsmDv0mavyEu2EsHxniBPfECy00BNvf%2Bj2FDaz1BImMIDavVBSwcWk8uCPjbsccsgiuKAfwr3dOXQ7R6y4NwmuFluBqn1GOXw1K13T4LrF%2BrhmqdOWeIVKB%2Bo9vnfQm1Dws6EoyS%2BG0bWDnyuUnLtWGf4cZPA6kjcM14fspFxoMnLjHBfdpYKZ3VmikbgwE8mDaiHODH1WQ36lUPigKbbIeHqOnHTIEw5h6F8D0MfIdVBSV2HCXweIlxCr6%2FV8hy2RzDouzT%2FIH%2FIobhHjGPM%2FlmkLAcfEzS2fioCJwkqQ3F%2BC77alAhtDQ4Oy5OIxRnRHVLpO%2BMA9Ml0SrEegCGPIzLucuCtbj2UTEOnBRQXyMolyySopJZb4p4BpJ6MiitLyCt1C66lvJpX5oMri%2BVD7FcTgdPYxcqM%2FMLD%2B4XqTYh5wdK7EYe3CpsVjpviZSVbn7yVHAb8WqdmFO%2BXRGhjQdN6rMrwGPiMCmQq12tTQftfmEwPGN1CVHG%2BbL1KUpEF4BRE61xDwEu7ZXyycPqTJMKHVn%2BXZ%2BxFsaxpUsp25U6JIVVPiNgt1OyhfjU6oqzwzeXH7KMRIcqz2d%2B3p%2BIbjRvoHcLc8AzgY4RvgWMGlb5gIpv15HQTDvdiLLwwjd3lyQY6sgE9t%2Bhi2Jv1DPgJN0YUGblcTV3Ey95h%2BBIXo6zWGwqhyZhkH%2ByxJKXouv2S1mKS3BM0dp2maJGDp69Mze8UkGjFYvdzxHT1zrCZ4dMRRkRObY3%2F4ZP33ogelhzchd7S76et35vYwYHd9DYycWZnJ%2FIcfpSZURGMJu3gLM3YhIscykGwQKqB21Tmyjufi0AaYyLk4w2OKc31kgjFvs6lNaHhqTuFButuHEiBUMzieixOI%2BX6&Expires=1765504482 [following]\n",
-      "--2025-12-10 14:54:42--  https://encode-public.s3.amazonaws.com/2020/09/19/425880b6-b323-4ee2-95ce-56bdd088d126/ENCFF884LDL.bigWig?response-content-disposition=attachment%3B%20filename%3DENCFF884LDL.bigWig&AWSAccessKeyId=ASIATGZNGCNXU6SGJVOL&Signature=4o0Pp2RvJtnZc9z7HOuCU1k9wwI%3D&x-amz-security-token=IQoJb3JpZ2luX2VjEA0aCXVzLXdlc3QtMiJGMEQCIEdyOOxtHk6rJT06xIjzZR3nVyqbPB1twIFxCDtIQfNXAiAph1lc69CfHzPPglodVnVh9QCjlsXHFyUEU3K0%2Bx%2F%2Bziq8BQjW%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDIyMDc0ODcxNDg2MyIMYwkeEaXuk%2BE48EDAKpAFkm4uzCSB40oRz3YT4m%2FZfBSH7XIuSCuzS7nrL5tXb9Q2rfPQSD4PHOyTR0LOOfcr98%2FyF8cJw4NE%2Fwsw8BRs4xPFEEyN6yGqwHmAyxBuwdca4GLSMGRDaSPoleMJw1FcSv96ofbZFYTTSol4b6%2FZj4jJjCa887%2F6S5x9kNIjTAtgX%2Fr3Ci4wi4FXGKTijTU%2FnbuuLZ3Cz2UobD6p732apsayl7avmUdWbUvROl3sHFOWOGCKsmDv0mavyEu2EsHxniBPfECy00BNvf%2Bj2FDaz1BImMIDavVBSwcWk8uCPjbsccsgiuKAfwr3dOXQ7R6y4NwmuFluBqn1GOXw1K13T4LrF%2BrhmqdOWeIVKB%2Bo9vnfQm1Dws6EoyS%2BG0bWDnyuUnLtWGf4cZPA6kjcM14fspFxoMnLjHBfdpYKZ3VmikbgwE8mDaiHODH1WQ36lUPigKbbIeHqOnHTIEw5h6F8D0MfIdVBSV2HCXweIlxCr6%2FV8hy2RzDouzT%2FIH%2FIobhHjGPM%2FlmkLAcfEzS2fioCJwkqQ3F%2BC77alAhtDQ4Oy5OIxRnRHVLpO%2BMA9Ml0SrEegCGPIzLucuCtbj2UTEOnBRQXyMolyySopJZb4p4BpJ6MiitLyCt1C66lvJpX5oMri%2BVD7FcTgdPYxcqM%2FMLD%2B4XqTYh5wdK7EYe3CpsVjpviZSVbn7yVHAb8WqdmFO%2BXRGhjQdN6rMrwGPiMCmQq12tTQftfmEwPGN1CVHG%2BbL1KUpEF4BRE61xDwEu7ZXyycPqTJMKHVn%2BXZ%2BxFsaxpUsp25U6JIVVPiNgt1OyhfjU6oqzwzeXH7KMRIcqz2d%2B3p%2BIbjRvoHcLc8AzgY4RvgWMGlb5gIpv15HQTDvdiLLwwjd3lyQY6sgE9t%2Bhi2Jv1DPgJN0YUGblcTV3Ey95h%2BBIXo6zWGwqhyZhkH%2ByxJKXouv2S1mKS3BM0dp2maJGDp69Mze8UkGjFYvdzxHT1zrCZ4dMRRkRObY3%2F4ZP33ogelhzchd7S76et35vYwYHd9DYycWZnJ%2FIcfpSZURGMJu3gLM3YhIscykGwQKqB21Tmyjufi0AaYyLk4w2OKc31kgjFvs6lNaHhqTuFButuHEiBUMzieixOI%2BX6&Expires=1765504482\n",
-      "Resolving encode-public.s3.amazonaws.com (encode-public.s3.amazonaws.com)... 52.92.248.169, 52.92.211.49, 3.5.80.18, ...\n",
-      "Connecting to encode-public.s3.amazonaws.com (encode-public.s3.amazonaws.com)|52.92.248.169|:443... connected.\n",
-      "HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable\n",
-      "\n",
-      "    The file is already fully retrieved; nothing to do.\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Download bigwig files\n",
-    "for bigwig_url in config[\"bigwig_url_list\"]:\n",
-    "    filename = extract_filename_from_url(bigwig_url)\n",
-    "    filepath = os.path.join(config[\"data_cache_dir\"], filename)\n",
-    "    print(f\"Downloading {filename}...\")\n",
-    "    subprocess.run([\"wget\", \"-c\", bigwig_url, \"-O\", filepath], check=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chrom_splits = {\n",
-    "    \"train\": [f\"chr{i}\" for i in range(1, 21)] + ['chrX', 'chrY'],\n",
-    "    \"val\": ['chr22'],\n",
-    "    \"test\": ['chr21']\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 3. Model and tokenizer setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class LinearHead(nn.Module):\n",
-    "    \"\"\"A linear head that predicts one scalar value per track.\"\"\"\n",
-    "    def __init__(self, embed_dim: int, num_labels: int):\n",
-    "        super().__init__()\n",
-    "        self.layer_norm = nn.LayerNorm(embed_dim)\n",
-    "        self.head = nn.Linear(embed_dim, num_labels)\n",
-    "    \n",
-    "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
-    "        x = self.layer_norm(x)\n",
-    "        x = self.head(x)\n",
-    "        x = F.softplus(x)  # Ensure positive values\n",
-    "        return x\n",
-    "\n",
-    "\n",
-    "class HFModelWithHead(nn.Module):\n",
-    "    \"\"\"Simple model wrapper: HF backbone + bigwig head.\"\"\"\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        model_name: str,\n",
-    "        bigwig_track_names: List[str],\n",
-    "        keep_target_center_fraction: float = 0.375,\n",
-    "    ):\n",
-    "        super().__init__()\n",
-    "        \n",
-    "        # Load config and model\n",
-    "        self.config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)\n",
-    "        self.backbone = AutoModelForMaskedLM.from_pretrained(\n",
-    "            model_name, \n",
-    "            trust_remote_code=True,\n",
-    "            config=self.config\n",
-    "        )\n",
-    "        \n",
-    "        self.keep_target_center_fraction = keep_target_center_fraction\n",
-    "\n",
-    "        if hasattr(self.config, \"embed_dim\"):\n",
-    "            embed_dim = self.config.embed_dim\n",
-    "        else:\n",
-    "            raise ValueError(f\"Could not determine embed_dim for {model_name}\")\n",
-    "        \n",
-    "        # Bigwig head (NTv3 outputs at single-nucleotide resolution)\n",
-    "        self.bigwig_head = LinearHead(embed_dim, len(bigwig_track_names))\n",
-    "        self.model_name = model_name\n",
-    "    \n",
-    "    def forward(self, tokens: torch.Tensor, **kwargs) -> Dict[str, torch.Tensor]:\n",
-    "        # Forward through backbone\n",
-    "        outputs = self.backbone(input_ids=tokens)\n",
-    "        embedding = outputs.hidden_states[-1]  # Last hidden state\n",
-    "        \n",
-    "        # Crop to center fraction\n",
-    "        if self.keep_target_center_fraction < 1.0:\n",
-    "            seq_len = embedding.shape[1]\n",
-    "            target_offset = int(seq_len * (1 - self.keep_target_center_fraction) // 2)\n",
-    "            target_length = seq_len - 2 * target_offset\n",
-    "            embedding = embedding[:, target_offset:target_offset + target_length, :]\n",
-    "        \n",
-    "        # Predict bigwig tracks\n",
-    "        bigwig_logits = self.bigwig_head(embedding)\n",
-    "        \n",
-    "        return {\"bigwig_tracks_logits\": bigwig_logits}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model loaded: InstaDeepAI/ntv3_8M_7downsample_pretrained_le_1mb\n",
-      "Number of bigwig tracks: 1\n",
-      "Model parameters: 7,693,244\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Load tokenizer\n",
-    "tokenizer = AutoTokenizer.from_pretrained(config[\"model_name\"], trust_remote_code=True)\n",
-    "\n",
-    "# Create model\n",
-    "model = HFModelWithHead(\n",
-    "    model_name=config[\"model_name\"],\n",
-    "    bigwig_track_names=config[\"bigwig_file_ids\"],\n",
-    "    keep_target_center_fraction=config[\"keep_target_center_fraction\"],\n",
-    ")\n",
-    "model = model.to(device)\n",
-    "model.train()\n",
-    "\n",
-    "print(f\"Model loaded: {config['model_name']}\")\n",
-    "print(f\"Number of bigwig tracks: {len(config['bigwig_file_ids'])}\")\n",
-    "print(f\"Model parameters: {sum(p.numel() for p in model.parameters()):,}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Scaling functions for targets\n",
-    "def get_track_means(bigwig_file_ids: List[str]) -> np.ndarray:\n",
-    "    \"\"\"\n",
-    "    Get track means for normalization.\n",
-    "    For now, return dummy values. In real pipeline, this loads from metadata.\n",
-    "    \"\"\"\n",
-    "    # Dummy values - in real pipeline, this would load from actual metadata\n",
-    "    return np.ones(len(bigwig_file_ids), dtype=np.float32) * 1.0\n",
-    "\n",
-    "\n",
-    "def create_targets_scaling_fn(bigwig_file_ids: List[str]) -> Callable[[torch.Tensor], torch.Tensor]:\n",
-    "    \"\"\"\n",
-    "    Build a scaling function based on track means.\n",
-    "    \"\"\"\n",
-    "    # Load track means\n",
-    "    track_means_np = get_track_means(bigwig_file_ids)\n",
-    "    track_means = torch.tensor(track_means_np, dtype=torch.float32)\n",
-    "    \n",
-    "    def transform_fn(x: torch.Tensor) -> torch.Tensor:\n",
-    "        \"\"\"\n",
-    "        x: torch.Tensor, shape (seq_len, num_tracks) or (batch, seq_len, num_tracks)\n",
-    "        \"\"\"\n",
-    "        # Move constants to correct device then normalize\n",
-    "        means = track_means.to(x.device)\n",
-    "        scaled = x / means\n",
-    "\n",
-    "        # Smooth clipping: if > 10, apply formula\n",
-    "        clipped = torch.where(\n",
-    "            scaled > 10.0,\n",
-    "            2.0 * torch.sqrt(scaled * 10.0) - 10.0,\n",
-    "            scaled,\n",
-    "        )\n",
-    "        return clipped\n",
-    "    \n",
-    "    return transform_fn"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 4. Data loading"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class GenomeBigWigDataset(Dataset):\n",
-    "    \"\"\"\n",
-    "    Random genomic windows from a reference genome + bigWig signal.\n",
-    "\n",
-    "    Each sample:\n",
-    "        - picks a chromosome from `chroms`,\n",
-    "        - picks a random window of length `window_size`,\n",
-    "        - returns (sequence, signal, chrom, start, end).\n",
-    "\n",
-    "    Args\n",
-    "    ----\n",
-    "    fasta_path : str\n",
-    "        Path to the reference genome FASTA (e.g. hg38.fna).\n",
-    "    bigwig_path : str\n",
-    "        Path to the bigWig file (e.g. ENCFF884LDL.bigWig).\n",
-    "    chroms : List[str]\n",
-    "        Chromosome names as they appear in the bigWig (e.g. [\"chr1\", \"chr2\", ...]).\n",
-    "    window_size : int\n",
-    "        Length of each random window (in bp).\n",
-    "    num_samples : int\n",
-    "        Number of samples the dataset will provide (len(dataset)).\n",
-    "    chrom_mapping : Optional[Dict[str, str]]\n",
-    "        Optional mapping from bigWig chrom name -> FASTA chrom name.\n",
-    "        If None, assumes the same names in both.\n",
-    "        Example for hg38 RefSeq FASTA:\n",
-    "            {\n",
-    "              \"chr1\": \"NC_000001.11\",\n",
-    "              \"chr2\": \"NC_000002.12\",\n",
-    "              ...\n",
-    "            }\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        fasta_path: str,\n",
-    "        bigwig_path_list: list[str],\n",
-    "        chroms: List[str],\n",
-    "        sequence_length: int,\n",
-    "        num_samples: int,\n",
-    "        tokenizer: AutoTokenizer,\n",
-    "        transform_fn: Callable[[torch.Tensor], torch.Tensor],\n",
-    "        keep_target_center_fraction: float = 1.0,\n",
-    "        num_tracks: int = 1,\n",
-    "    ):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.fasta = Fasta(fasta_path, as_raw=True, sequence_always_upper=True)\n",
-    "        self.bw_list = [\n",
-    "            pyBigWig.open(bigwig_path)\n",
-    "            for bigwig_path in bigwig_path_list\n",
-    "        ]\n",
-    "        self.sequence_length = sequence_length\n",
-    "        self.num_samples = num_samples\n",
-    "        self.tokenizer = tokenizer\n",
-    "        self.transform_fn = transform_fn\n",
-    "        self.keep_target_center_fraction = keep_target_center_fraction\n",
-    "        self.num_tracks = num_tracks\n",
-    "        self.chroms = chroms\n",
-    "\n",
-    "        # Intersect lengths between FASTA and bigWig for safety\n",
-    "        bw_chrom_lengths = self.bw_list[0].chroms()  # dict: chrom -> length\n",
-    "\n",
-    "        self.valid_chroms = []\n",
-    "        self.chrom_lengths = {}\n",
-    "\n",
-    "        for c in chroms:\n",
-    "            if c not in bw_chrom_lengths or c not in self.fasta:\n",
-    "                continue\n",
-    "\n",
-    "            fa_len = len(self.fasta[c])\n",
-    "            bw_len = bw_chrom_lengths[c]\n",
-    "            L = min(fa_len, bw_len)\n",
-    "\n",
-    "            if L > self.sequence_length:\n",
-    "                self.valid_chroms.append(c)\n",
-    "                self.chrom_lengths[c] = L\n",
-    "\n",
-    "        if not self.valid_chroms:\n",
-    "            raise ValueError(\"No valid chromosomes after intersecting FASTA and bigWig.\")\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        return self.num_samples\n",
-    "\n",
-    "    def __getitem__(self, idx):\n",
-    "        # Ignore idx, sample randomly\n",
-    "        chrom = random.choice(self.valid_chroms)\n",
-    "        chrom_len = self.chrom_lengths[chrom]\n",
-    "\n",
-    "        max_start = chrom_len - self.sequence_length\n",
-    "        start = random.randint(0, max_start)\n",
-    "        end = start + self.sequence_length\n",
-    "\n",
-    "        # Sequence\n",
-    "        seq = self.fasta[chrom][start:end]  # string slice\n",
-    "        tokens = self.tokenizer(\n",
-    "            seq,\n",
-    "            return_tensors=\"pt\",  # Returns a dict of PyTorch tensors\n",
-    "        )[\"input_ids\"][0]\n",
-    "        # The 'input_ids' field contains the tokenized sequence.\n",
-    "        # For a single input string, its shape is typically (1, len(seq))\n",
-    "\n",
-    "        # Signal from bigWig tracks (numpy array) -> torch tensor\n",
-    "        bigwig_targets = np.array([\n",
-    "            self.bw_list[i].values(chrom, start, end, numpy=True)\n",
-    "            for i in range(len(self.bw_list))\n",
-    "        ])  # shape (num_tracks, seq_len)\n",
-    "        # Transpose to (seq_len, num_tracks)\n",
-    "        bigwig_targets = bigwig_targets.T\n",
-    "        # pyBigWig returns NaN where no data; turn NaN into 0\n",
-    "        bigwig_targets = torch.tensor(bigwig_targets, dtype=torch.float32)\n",
-    "        bigwig_targets = torch.nan_to_num(bigwig_targets, nan=0.0)\n",
-    "        \n",
-    "        # Crop targets to center fraction\n",
-    "        if self.keep_target_center_fraction < 1.0:\n",
-    "            seq_len = bigwig_targets.shape[0]  # First dimension is sequence length\n",
-    "            target_offset = int(seq_len * (1 - self.keep_target_center_fraction) // 2)\n",
-    "            target_length = seq_len - 2 * target_offset\n",
-    "            bigwig_targets = bigwig_targets[target_offset:target_offset + target_length, :]\n",
-    "\n",
-    "        # Apply scaling to targets\n",
-    "        bigwig_targets = self.transform_fn(bigwig_targets)\n",
-    "\n",
-    "        sample = {\n",
-    "            \"tokens\": tokens,\n",
-    "            \"bigwig_targets\": bigwig_targets,\n",
-    "            \"chrom\": chrom,\n",
-    "            \"start\": start,\n",
-    "            \"end\": end,\n",
-    "        }\n",
-    "        return sample"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train samples: 100\n",
-      "Val samples: 100\n",
-      "Test samples: 100\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create scaling function\n",
-    "transform_fn = create_targets_scaling_fn(config[\"bigwig_file_ids\"])\n",
-    "\n",
-    "create_dataset_fn = functools.partial(\n",
-    "    GenomeBigWigDataset,\n",
-    "    fasta_path=fasta_path,\n",
-    "    bigwig_path_list=bigwig_path_list,\n",
-    "    sequence_length=config[\"sequence_length\"],\n",
-    "    tokenizer=tokenizer,\n",
-    "    transform_fn=transform_fn,\n",
-    "    keep_target_center_fraction=config[\"keep_target_center_fraction\"],\n",
-    "    num_tracks=len(config[\"bigwig_file_ids\"]),\n",
-    ")\n",
-    "\n",
-    "train_dataset = create_dataset_fn(\n",
-    "    chroms=chrom_splits[\"train\"],\n",
-    "    num_samples=100,\n",
-    ")\n",
-    "\n",
-    "val_dataset = create_dataset_fn(\n",
-    "    chroms=chrom_splits[\"val\"],\n",
-    "    num_samples=config[\"num_validation_samples\"],\n",
-    ")\n",
-    "\n",
-    "test_dataset = create_dataset_fn(\n",
-    "    chroms=chrom_splits[\"test\"],\n",
-    "    num_samples=config[\"num_validation_samples\"],\n",
-    ")\n",
-    "\n",
-    "# Create dataloaders\n",
-    "train_loader = DataLoader(\n",
-    "    train_dataset,\n",
-    "    batch_size=config[\"batch_size\"],\n",
-    "    shuffle=True,\n",
-    "    num_workers=config[\"num_workers\"],\n",
-    ")\n",
-    "\n",
-    "val_loader = DataLoader(\n",
-    "    val_dataset,\n",
-    "    batch_size=config[\"batch_size\"],\n",
-    "    shuffle=False,\n",
-    "    num_workers=config[\"num_workers\"],\n",
-    ")\n",
-    "\n",
-    "test_loader = DataLoader(\n",
-    "    test_dataset,\n",
-    "    batch_size=config[\"batch_size\"],\n",
-    "    shuffle=False,\n",
-    "    num_workers=config[\"num_workers\"],\n",
-    ")\n",
-    "\n",
-    "print(f\"Train samples: {len(train_dataset)}\")\n",
-    "print(f\"Val samples: {len(val_dataset)}\")\n",
-    "print(f\"Test samples: {len(test_dataset)}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 5. Optimizer setup\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Training configuration:\n",
-      "  Batch size: 8\n",
-      "  Total training steps: 1000\n",
-      "  Log metrics every: 10 steps\n",
-      "  Validate every: 50 steps\n",
-      "\n",
-      "Optimizer setup:\n",
-      "  Learning rate: 1e-05\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Training setup\n",
-    "print(f\"Training configuration:\")\n",
-    "print(f\"  Batch size: {config[\"batch_size\"]}\")\n",
-    "print(f\"  Total training steps: {config[\"num_steps_training\"]}\")\n",
-    "print(f\"  Log metrics every: {config[\"log_every_n_steps\"]} steps\")\n",
-    "print(f\"  Validate every: {config[\"validate_every_n_steps\"]} steps\")\n",
-    "\n",
-    "# Setup optimizer\n",
-    "optimizer = AdamW(\n",
-    "    model.parameters(),\n",
-    "    lr=config[\"learning_rate\"],\n",
-    "    weight_decay=config[\"weight_decay\"],\n",
-    ")\n",
-    "\n",
-    "print(f\"\\nOptimizer setup:\")\n",
-    "print(f\"  Learning rate: {config['learning_rate']}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 6. Metrics setup (using TorchMetrics)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TracksMetrics:\n",
-    "    \"\"\"Simple metrics tracker for tracks prediction.\"\"\"\n",
-    "    \n",
-    "    def __init__(self, track_names: List[str]):\n",
-    "        self.track_names = track_names\n",
-    "        self.num_tracks = len(track_names)\n",
-    "        # Metrics: comparing scaled targets with scaled predictions\n",
-    "        self.pearson_metrics = [\n",
-    "            PearsonCorrCoef().to(device) for _ in range(self.num_tracks)\n",
-    "        ]\n",
-    "        self.losses = []\n",
-    "    \n",
-    "    def reset(self):\n",
-    "        for metric in self.pearson_metrics:\n",
-    "            metric.reset()\n",
-    "        self.losses = []\n",
-    "    \n",
-    "    def update(\n",
-    "        self, \n",
-    "        predictions: torch.Tensor, \n",
-    "        targets: torch.Tensor,\n",
-    "        loss: float\n",
-    "    ):\n",
-    "        \"\"\"\n",
-    "        Update metrics.\n",
-    "        Args:\n",
-    "            predictions: (batch, seq_len, num_tracks)\n",
-    "            targets: (batch, seq_len, num_tracks)\n",
-    "            loss: scalar loss value\n",
-    "        \"\"\"\n",
-    "        # Flatten batch and sequence dimensions\n",
-    "        pred_flat = predictions.detach().reshape(-1, self.num_tracks)  # (N, num_tracks)\n",
-    "        target_flat = targets.detach().reshape(-1, self.num_tracks)  # (N, num_tracks)\n",
-    "        \n",
-    "        # Update metrics\n",
-    "        for i, metric in enumerate(self.pearson_metrics):\n",
-    "            metric.update(pred_flat[:, i], target_flat[:, i])\n",
-    "        \n",
-    "        self.losses.append(loss)\n",
-    "    \n",
-    "    def compute(self) -> Dict[str, float]:\n",
-    "        \"\"\"Compute and return all metrics.\"\"\"\n",
-    "        metrics_dict = {}\n",
-    "        \n",
-    "        # Per-track Pearson correlations\n",
-    "        for i, (track_name, metric) in enumerate(zip(self.track_names, self.pearson_metrics)):\n",
-    "            corr = metric.compute().item()\n",
-    "            metrics_dict[f\"{track_name}/pearson\"] = corr\n",
-    "        \n",
-    "        # Mean Pearson correlation\n",
-    "        correlations = [metric.compute().item() for metric in self.pearson_metrics]\n",
-    "        metrics_dict[\"mean/pearson\"] = np.nanmean(correlations)\n",
-    "        \n",
-    "        # Mean loss\n",
-    "        metrics_dict[\"loss\"] = np.mean(self.losses) if self.losses else 0.0\n",
-    "        \n",
-    "        return metrics_dict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_metrics = TracksMetrics(config[\"bigwig_file_ids\"])\n",
-    "val_metrics = TracksMetrics(config[\"bigwig_file_ids\"])\n",
-    "test_metrics = TracksMetrics(config[\"bigwig_file_ids\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 7. Loss functions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def poisson_loss(ytrue: torch.Tensor, ypred: torch.Tensor, epsilon: float = 1e-7) -> torch.Tensor:\n",
-    "    \"\"\"Poisson loss per element: ypred - ytrue * log(ypred).\"\"\"\n",
-    "    return ypred - ytrue * torch.log(ypred + epsilon)\n",
-    "\n",
-    "\n",
-    "def safe_for_grad_log_torch(x: torch.Tensor) -> torch.Tensor:\n",
-    "    \"\"\"Guarantees that the log is defined for all x > 0 in a differentiable way.\"\"\"\n",
-    "    return torch.log(torch.where(x > 0.0, x, torch.ones_like(x)))\n",
-    "\n",
-    "\n",
-    "def poisson_multinomial_loss(\n",
-    "    logits: torch.Tensor,\n",
-    "    targets: torch.Tensor,\n",
-    "    shape_loss_coefficient: float = 5.0,\n",
-    "    epsilon: float = 1e-7,\n",
-    ") -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]:\n",
-    "    \"\"\"\n",
-    "    Regression loss for bigwig tracks (MSE, Poisson, or Poisson-Multinomial).\n",
-    "    \"\"\"\n",
-    "\n",
-    "    # Scale loss\n",
-    "    sum_pred = logits.sum(dim=1)  # (batch, num_tracks)\n",
-    "    sum_true = targets.sum(dim=1)  # (batch, num_tracks)\n",
-    "    scale_loss = poisson_loss(sum_true, sum_pred, epsilon=epsilon)\n",
-    "    scale_loss = scale_loss.mean()\n",
-    "    \n",
-    "    # Shape loss\n",
-    "    denom = logits.sum(dim=1, keepdim=True) + epsilon\n",
-    "    p_pred = logits / denom\n",
-    "    pl_pred = safe_for_grad_log_torch(p_pred)\n",
-    "    shape_loss = -(targets * pl_pred).mean()\n",
-    "    \n",
-    "    # Combine\n",
-    "    loss = shape_loss + scale_loss / shape_loss_coefficient\n",
-    "\n",
-    "    return loss, scale_loss, shape_loss\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 8. Training loop"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def train_step(\n",
-    "    model: nn.Module,\n",
-    "    batch: Dict[str, torch.Tensor],\n",
-    ") -> float:\n",
-    "    \"\"\"Single training step.\"\"\"\n",
-    "    tokens = batch[\"tokens\"].to(device)\n",
-    "    bigwig_targets = batch[\"bigwig_targets\"].to(device)\n",
-    "    \n",
-    "    # Forward pass\n",
-    "    outputs = model(tokens=tokens)\n",
-    "    bigwig_logits = outputs[\"bigwig_tracks_logits\"]\n",
-    "    \n",
-    "    # Compute loss\n",
-    "    loss, _, _ = poisson_multinomial_loss(\n",
-    "        logits=bigwig_logits,\n",
-    "        targets=bigwig_targets,\n",
-    "    )\n",
-    "    \n",
-    "    # Backward pass\n",
-    "    loss.backward()\n",
-    "    return loss.item()\n",
-    "\n",
-    "\n",
-    "def validation_step(\n",
-    "    model: nn.Module,\n",
-    "    batch: Dict[str, torch.Tensor],\n",
-    "    metrics: TracksMetrics,\n",
-    ") -> float:\n",
-    "    \"\"\"Single validation step.\"\"\"\n",
-    "    model.eval()\n",
-    "    \n",
-    "    tokens = batch[\"tokens\"].to(device)\n",
-    "    bigwig_targets = batch[\"bigwig_targets\"].to(device)\n",
-    "    \n",
-    "    with torch.no_grad():\n",
-    "        # Forward pass\n",
-    "        outputs = model(tokens=tokens)\n",
-    "        bigwig_logits = outputs[\"bigwig_tracks_logits\"]\n",
-    "        \n",
-    "        # Compute loss\n",
-    "        loss, _, _ = poisson_multinomial_loss(\n",
-    "            logits=bigwig_logits,\n",
-    "            targets=bigwig_targets,\n",
-    "        )\n",
-    "        \n",
-    "        # Update metrics\n",
-    "        metrics.update(\n",
-    "            predictions=bigwig_logits,\n",
-    "            targets=bigwig_targets,\n",
-    "            loss=loss.item()\n",
-    "        )\n",
-    "    \n",
-    "    return loss.item()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Interactive plotting is temporary for debug"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Starting training...\n",
-      "Training for 1000 steps\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5935c992adb7428bac8de1aa6873dd7e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "FigureWidget({\n",
-       "    'data': [{'line': {'color': 'blue'},\n",
-       "              'mode': 'lines+markers',\n",
-       "              'name': 'Train Loss',\n",
-       "              'type': 'scatter',\n",
-       "              'uid': '5424e4af-13b6-48c8-a367-8aa145c3a9db',\n",
-       "              'x': [],\n",
-       "              'xaxis': 'x',\n",
-       "              'y': [],\n",
-       "              'yaxis': 'y'},\n",
-       "             {'line': {'color': 'red'},\n",
-       "              'mode': 'lines+markers',\n",
-       "              'name': 'Val Loss',\n",
-       "              'type': 'scatter',\n",
-       "              'uid': 'fe995660-5f01-4c12-9d7d-9ed19ddee785',\n",
-       "              'x': [],\n",
-       "              'xaxis': 'x',\n",
-       "              'y': [],\n",
-       "              'yaxis': 'y'},\n",
-       "             {'line': {'color': 'green'},\n",
-       "              'mode': 'lines+markers',\n",
-       "              'name': 'Train Pearson',\n",
-       "              'type': 'scatter',\n",
-       "              'uid': '8453b45b-4613-41bc-a46b-ac59ba9e6f97',\n",
-       "              'x': [],\n",
-       "              'xaxis': 'x2',\n",
-       "              'y': [],\n",
-       "              'yaxis': 'y2'},\n",
-       "             {'line': {'color': 'orange'},\n",
-       "              'mode': 'lines+markers',\n",
-       "              'name': 'Val Pearson',\n",
-       "              'type': 'scatter',\n",
-       "              'uid': '0887ea97-abf9-4fcf-8ea8-c638dc153a4d',\n",
-       "              'x': [],\n",
-       "              'xaxis': 'x2',\n",
-       "              'y': [],\n",
-       "              'yaxis': 'y2'}],\n",
-       "    'layout': {'annotations': [{'font': {'size': 16},\n",
-       "                                'showarrow': False,\n",
-       "                                'text': 'Loss',\n",
-       "                                'x': 0.2125,\n",
-       "                                'xanchor': 'center',\n",
-       "                                'xref': 'paper',\n",
-       "                                'y': 1.0,\n",
-       "                                'yanchor': 'bottom',\n",
-       "                                'yref': 'paper'},\n",
-       "                               {'font': {'size': 16},\n",
-       "                                'showarrow': False,\n",
-       "                                'text': 'Mean Pearson Correlation',\n",
-       "                                'x': 0.7875,\n",
-       "                                'xanchor': 'center',\n",
-       "                                'xref': 'paper',\n",
-       "                                'y': 1.0,\n",
-       "                                'yanchor': 'bottom',\n",
-       "                                'yref': 'paper'}],\n",
-       "               'height': 800,\n",
-       "               'showlegend': True,\n",
-       "               'template': '...',\n",
-       "               'title': {'text': 'Training'},\n",
-       "               'width': 1600,\n",
-       "               'xaxis': {'anchor': 'y', 'domain': [0.0, 0.425], 'title': {'text': 'Step'}},\n",
-       "               'xaxis2': {'anchor': 'y2', 'domain': [0.575, 1.0], 'title': {'text': 'Step'}},\n",
-       "               'yaxis': {'anchor': 'x', 'domain': [0.0, 1.0], 'title': {'text': 'Loss'}},\n",
-       "               'yaxis2': {'anchor': 'x2', 'domain': [0.0, 1.0], 'title': {'text': 'Pearson Correlation'}}}\n",
-       "})"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/y-bornachot/venvs/ntv3-env/lib/python3.12/site-packages/torch/amp/autocast_mode.py:287: UserWarning:\n",
-      "\n",
-      "In CPU autocast, but the target dtype is not supported. Disabling autocast.\n",
-      "CPU Autocast only supports dtype of torch.bfloat16, torch.float16 currently.\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Step 10/1000 | Loss: 0.2374 | Mean Pearson: 0.0382 | LR: 1.00e-05\n",
-      "Step 20/1000 | Loss: 2.2259 | Mean Pearson: -0.0884 | LR: 1.00e-05\n",
-      "Step 30/1000 | Loss: 20.0122 | Mean Pearson: 0.1379 | LR: 1.00e-05\n",
-      "Step 40/1000 | Loss: 9.6938 | Mean Pearson: -0.1497 | LR: 1.00e-05\n",
-      "Step 50/1000 | Loss: -1.8435 | Mean Pearson: -0.1875 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 50...\n",
-      "  Validation Loss: 11.5599\n",
-      "  Validation Mean Pearson: -0.1576\n",
-      "    ENCFF884LDL/pearson: -0.1576\n",
-      "Step 60/1000 | Loss: 1.4427 | Mean Pearson: 0.2841 | LR: 1.00e-05\n",
-      "Step 70/1000 | Loss: -3.4037 | Mean Pearson: -0.1362 | LR: 1.00e-05\n",
-      "Step 80/1000 | Loss: 9.0958 | Mean Pearson: -0.1319 | LR: 1.00e-05\n",
-      "Step 90/1000 | Loss: -7.8433 | Mean Pearson: -0.0576 | LR: 1.00e-05\n",
-      "Step 100/1000 | Loss: 7.3503 | Mean Pearson: -0.2150 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 100...\n",
-      "  Validation Loss: 22.3383\n",
-      "  Validation Mean Pearson: -0.2867\n",
-      "    ENCFF884LDL/pearson: -0.2867\n",
-      "Step 110/1000 | Loss: -8.1600 | Mean Pearson: -0.1616 | LR: 1.00e-05\n",
-      "Step 120/1000 | Loss: -0.8743 | Mean Pearson: -0.1318 | LR: 1.00e-05\n",
-      "Step 130/1000 | Loss: -2.9825 | Mean Pearson: -0.0480 | LR: 1.00e-05\n",
-      "Step 140/1000 | Loss: -2.4524 | Mean Pearson: -0.0879 | LR: 1.00e-05\n",
-      "Step 150/1000 | Loss: 3.8818 | Mean Pearson: -0.0907 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 150...\n",
-      "  Validation Loss: 19.6866\n",
-      "  Validation Mean Pearson: -0.2207\n",
-      "    ENCFF884LDL/pearson: -0.2207\n",
-      "Step 160/1000 | Loss: -1.0933 | Mean Pearson: -0.1243 | LR: 1.00e-05\n",
-      "Step 170/1000 | Loss: -2.2577 | Mean Pearson: -0.0212 | LR: 1.00e-05\n",
-      "Step 180/1000 | Loss: 0.0738 | Mean Pearson: 0.5643 | LR: 1.00e-05\n",
-      "Step 190/1000 | Loss: -0.1097 | Mean Pearson: 0.0309 | LR: 1.00e-05\n",
-      "Step 200/1000 | Loss: -8.7972 | Mean Pearson: 0.4804 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 200...\n",
-      "  Validation Loss: -8.8160\n",
-      "  Validation Mean Pearson: 0.0912\n",
-      "    ENCFF884LDL/pearson: 0.0912\n",
-      "Step 210/1000 | Loss: -2.5429 | Mean Pearson: 0.3908 | LR: 1.00e-05\n",
-      "Step 220/1000 | Loss: -6.8421 | Mean Pearson: 0.4080 | LR: 1.00e-05\n",
-      "Step 230/1000 | Loss: -4.4312 | Mean Pearson: -0.0400 | LR: 1.00e-05\n",
-      "Step 240/1000 | Loss: -11.4732 | Mean Pearson: 0.6653 | LR: 1.00e-05\n",
-      "Step 250/1000 | Loss: -9.2648 | Mean Pearson: 0.0539 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 250...\n",
-      "  Validation Loss: -6.8987\n",
-      "  Validation Mean Pearson: 0.0654\n",
-      "    ENCFF884LDL/pearson: 0.0654\n",
-      "Step 260/1000 | Loss: -0.6699 | Mean Pearson: 0.0913 | LR: 1.00e-05\n",
-      "Step 270/1000 | Loss: -8.6625 | Mean Pearson: 0.3179 | LR: 1.00e-05\n",
-      "Step 280/1000 | Loss: -11.7691 | Mean Pearson: 0.0004 | LR: 1.00e-05\n",
-      "Step 290/1000 | Loss: -14.1622 | Mean Pearson: 0.0492 | LR: 1.00e-05\n",
-      "Step 300/1000 | Loss: 0.9208 | Mean Pearson: 0.0607 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 300...\n",
-      "  Validation Loss: -5.0427\n",
-      "  Validation Mean Pearson: 0.3464\n",
-      "    ENCFF884LDL/pearson: 0.3464\n",
-      "Step 310/1000 | Loss: -1.2881 | Mean Pearson: 0.1696 | LR: 1.00e-05\n",
-      "Step 320/1000 | Loss: -18.6637 | Mean Pearson: 0.0892 | LR: 1.00e-05\n",
-      "Step 330/1000 | Loss: -36.6038 | Mean Pearson: 0.3356 | LR: 1.00e-05\n",
-      "Step 340/1000 | Loss: -2.4984 | Mean Pearson: 0.2305 | LR: 1.00e-05\n",
-      "Step 350/1000 | Loss: -4.7985 | Mean Pearson: 0.0968 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 350...\n",
-      "  Validation Loss: -13.6500\n",
-      "  Validation Mean Pearson: 0.2737\n",
-      "    ENCFF884LDL/pearson: 0.2737\n",
-      "Step 360/1000 | Loss: -9.4795 | Mean Pearson: 0.0579 | LR: 1.00e-05\n",
-      "Step 370/1000 | Loss: 0.3531 | Mean Pearson: 0.0240 | LR: 1.00e-05\n",
-      "Step 380/1000 | Loss: -5.7921 | Mean Pearson: 0.4119 | LR: 1.00e-05\n",
-      "Step 390/1000 | Loss: -2.7049 | Mean Pearson: 0.1343 | LR: 1.00e-05\n",
-      "Step 400/1000 | Loss: -32.8422 | Mean Pearson: 0.1545 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 400...\n",
-      "  Validation Loss: -4.3502\n",
-      "  Validation Mean Pearson: 0.3124\n",
-      "    ENCFF884LDL/pearson: 0.3124\n",
-      "Step 410/1000 | Loss: -18.9574 | Mean Pearson: 0.0594 | LR: 1.00e-05\n",
-      "Step 420/1000 | Loss: -5.4032 | Mean Pearson: 0.2804 | LR: 1.00e-05\n",
-      "Step 430/1000 | Loss: -0.5171 | Mean Pearson: 0.1835 | LR: 1.00e-05\n",
-      "Step 440/1000 | Loss: -3.4071 | Mean Pearson: 0.0680 | LR: 1.00e-05\n",
-      "Step 450/1000 | Loss: -3.5580 | Mean Pearson: 0.0850 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 450...\n",
-      "  Validation Loss: -7.3308\n",
-      "  Validation Mean Pearson: 0.1128\n",
-      "    ENCFF884LDL/pearson: 0.1128\n",
-      "Step 460/1000 | Loss: -0.9750 | Mean Pearson: 0.1717 | LR: 1.00e-05\n",
-      "Step 470/1000 | Loss: -5.5775 | Mean Pearson: 0.1321 | LR: 1.00e-05\n",
-      "Step 480/1000 | Loss: -1.1170 | Mean Pearson: 0.1484 | LR: 1.00e-05\n",
-      "Step 490/1000 | Loss: -3.8053 | Mean Pearson: 0.1959 | LR: 1.00e-05\n",
-      "Step 500/1000 | Loss: -4.5933 | Mean Pearson: 0.1860 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 500...\n",
-      "  Validation Loss: -5.7617\n",
-      "  Validation Mean Pearson: 0.3155\n",
-      "    ENCFF884LDL/pearson: 0.3155\n",
-      "Step 510/1000 | Loss: -3.3306 | Mean Pearson: 0.2815 | LR: 1.00e-05\n",
-      "Step 520/1000 | Loss: -2.1962 | Mean Pearson: 0.1151 | LR: 1.00e-05\n",
-      "Step 530/1000 | Loss: -1.5388 | Mean Pearson: 0.3783 | LR: 1.00e-05\n",
-      "Step 540/1000 | Loss: -2.2349 | Mean Pearson: 0.0734 | LR: 1.00e-05\n",
-      "Step 550/1000 | Loss: -1.5502 | Mean Pearson: 0.2171 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 550...\n",
-      "  Validation Loss: -3.0059\n",
-      "  Validation Mean Pearson: 0.2325\n",
-      "    ENCFF884LDL/pearson: 0.2325\n",
-      "Step 560/1000 | Loss: -2.0764 | Mean Pearson: -0.0049 | LR: 1.00e-05\n",
-      "Step 570/1000 | Loss: -1.7384 | Mean Pearson: 0.2989 | LR: 1.00e-05\n",
-      "Step 580/1000 | Loss: -6.7306 | Mean Pearson: 0.2522 | LR: 1.00e-05\n",
-      "Step 590/1000 | Loss: -3.2473 | Mean Pearson: 0.1042 | LR: 1.00e-05\n",
-      "Step 600/1000 | Loss: -4.2841 | Mean Pearson: 0.1936 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 600...\n",
-      "  Validation Loss: -4.5611\n",
-      "  Validation Mean Pearson: 0.2744\n",
-      "    ENCFF884LDL/pearson: 0.2744\n",
-      "Step 610/1000 | Loss: -3.5691 | Mean Pearson: 0.1803 | LR: 1.00e-05\n",
-      "Step 620/1000 | Loss: -7.2129 | Mean Pearson: 0.0901 | LR: 1.00e-05\n",
-      "Step 630/1000 | Loss: -6.0598 | Mean Pearson: 0.1795 | LR: 1.00e-05\n",
-      "Step 640/1000 | Loss: -2.8917 | Mean Pearson: 0.1111 | LR: 1.00e-05\n",
-      "Step 650/1000 | Loss: -2.7210 | Mean Pearson: 0.3566 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 650...\n",
-      "  Validation Loss: -4.3997\n",
-      "  Validation Mean Pearson: 0.3327\n",
-      "    ENCFF884LDL/pearson: 0.3327\n",
-      "Step 660/1000 | Loss: -3.4793 | Mean Pearson: 0.0441 | LR: 1.00e-05\n",
-      "Step 670/1000 | Loss: -1.9743 | Mean Pearson: 0.1364 | LR: 1.00e-05\n",
-      "Step 680/1000 | Loss: -5.7498 | Mean Pearson: 0.2330 | LR: 1.00e-05\n",
-      "Step 690/1000 | Loss: -12.8701 | Mean Pearson: 0.3182 | LR: 1.00e-05\n",
-      "Step 700/1000 | Loss: -1.5847 | Mean Pearson: 0.1971 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 700...\n",
-      "  Validation Loss: -2.0630\n",
-      "  Validation Mean Pearson: 0.1267\n",
-      "    ENCFF884LDL/pearson: 0.1267\n",
-      "Step 710/1000 | Loss: -6.0704 | Mean Pearson: 0.3715 | LR: 1.00e-05\n",
-      "Step 720/1000 | Loss: -2.6020 | Mean Pearson: 0.1244 | LR: 1.00e-05\n",
-      "Step 730/1000 | Loss: -58.8965 | Mean Pearson: 0.5625 | LR: 1.00e-05\n",
-      "Step 740/1000 | Loss: -1.2855 | Mean Pearson: 0.2658 | LR: 1.00e-05\n",
-      "Step 750/1000 | Loss: -4.4599 | Mean Pearson: 0.0137 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 750...\n",
-      "  Validation Loss: -11.1562\n",
-      "  Validation Mean Pearson: 0.0844\n",
-      "    ENCFF884LDL/pearson: 0.0844\n",
-      "Step 760/1000 | Loss: -11.6905 | Mean Pearson: 0.1914 | LR: 1.00e-05\n",
-      "Step 770/1000 | Loss: -4.0964 | Mean Pearson: 0.2022 | LR: 1.00e-05\n",
-      "Step 780/1000 | Loss: -1.5512 | Mean Pearson: 0.3568 | LR: 1.00e-05\n",
-      "Step 790/1000 | Loss: -5.5843 | Mean Pearson: 0.2058 | LR: 1.00e-05\n",
-      "Step 800/1000 | Loss: -3.9190 | Mean Pearson: 0.4362 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 800...\n",
-      "  Validation Loss: -4.7017\n",
-      "  Validation Mean Pearson: 0.3817\n",
-      "    ENCFF884LDL/pearson: 0.3817\n",
-      "Step 810/1000 | Loss: -7.6856 | Mean Pearson: 0.0672 | LR: 1.00e-05\n",
-      "Step 820/1000 | Loss: -5.3603 | Mean Pearson: 0.2325 | LR: 1.00e-05\n",
-      "Step 830/1000 | Loss: -3.8539 | Mean Pearson: 0.2808 | LR: 1.00e-05\n",
-      "Step 840/1000 | Loss: -8.1141 | Mean Pearson: 0.2529 | LR: 1.00e-05\n",
-      "Step 850/1000 | Loss: -10.5886 | Mean Pearson: 0.3454 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 850...\n",
-      "  Validation Loss: -4.9108\n",
-      "  Validation Mean Pearson: 0.2195\n",
-      "    ENCFF884LDL/pearson: 0.2195\n",
-      "Step 860/1000 | Loss: -4.1028 | Mean Pearson: 0.3304 | LR: 1.00e-05\n",
-      "Step 870/1000 | Loss: -7.1834 | Mean Pearson: 0.1206 | LR: 1.00e-05\n",
-      "Step 880/1000 | Loss: -8.9869 | Mean Pearson: 0.3584 | LR: 1.00e-05\n",
-      "Step 890/1000 | Loss: -2.2697 | Mean Pearson: 0.0943 | LR: 1.00e-05\n",
-      "Step 900/1000 | Loss: -14.0142 | Mean Pearson: 0.4761 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 900...\n",
-      "  Validation Loss: -3.2329\n",
-      "  Validation Mean Pearson: 0.3635\n",
-      "    ENCFF884LDL/pearson: 0.3635\n",
-      "Step 910/1000 | Loss: -9.0941 | Mean Pearson: 0.2754 | LR: 1.00e-05\n",
-      "Step 920/1000 | Loss: -4.6371 | Mean Pearson: 0.0167 | LR: 1.00e-05\n",
-      "Step 930/1000 | Loss: -7.9853 | Mean Pearson: 0.0941 | LR: 1.00e-05\n",
-      "Step 940/1000 | Loss: -22.9349 | Mean Pearson: 0.5140 | LR: 1.00e-05\n",
-      "Step 950/1000 | Loss: -2.0866 | Mean Pearson: 0.1746 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 950...\n",
-      "  Validation Loss: -8.8318\n",
-      "  Validation Mean Pearson: 0.1597\n",
-      "    ENCFF884LDL/pearson: 0.1597\n",
-      "Step 960/1000 | Loss: -4.8540 | Mean Pearson: 0.6318 | LR: 1.00e-05\n",
-      "Step 970/1000 | Loss: -4.1091 | Mean Pearson: 0.0985 | LR: 1.00e-05\n",
-      "Step 980/1000 | Loss: -5.1141 | Mean Pearson: 0.2031 | LR: 1.00e-05\n",
-      "Step 990/1000 | Loss: -4.1959 | Mean Pearson: 0.2404 | LR: 1.00e-05\n",
-      "Step 1000/1000 | Loss: -0.9942 | Mean Pearson: 0.2742 | LR: 1.00e-05\n",
-      "\n",
-      "Running validation at step 1000...\n",
-      "  Validation Loss: -4.2796\n",
-      "  Validation Mean Pearson: 0.1425\n",
-      "    ENCFF884LDL/pearson: 0.1425\n",
-      "\n",
-      "Training completed after 1000 steps.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Training loop\n",
-    "print(\"Starting training...\")\n",
-    "print(f\"Training for {config[\"num_steps_training\"]} steps\\n\")\n",
-    "\n",
-    "model.train()\n",
-    "train_metrics.reset()\n",
-    "optimizer.zero_grad()  # Initialize gradients\n",
-    "\n",
-    "# Track metrics for plotting\n",
-    "train_steps = []\n",
-    "train_losses = []\n",
-    "train_pearson_scores = []\n",
-    "val_steps = []\n",
-    "val_losses = []\n",
-    "val_pearson_scores = []\n",
-    "\n",
-    "# Initialize interactive plots using FigureWidget for real-time updates\n",
-    "from plotly.graph_objects import FigureWidget\n",
-    "from plotly.subplots import make_subplots\n",
-    "\n",
-    "# Create base figure with subplots\n",
-    "fig_base = make_subplots(\n",
-    "    rows=1, cols=2,\n",
-    "    subplot_titles=('Loss', 'Mean Pearson Correlation'),\n",
-    "    horizontal_spacing=0.15,\n",
-    ")\n",
-    "\n",
-    "# Add empty traces for train and val metrics\n",
-    "fig_base.add_trace(\n",
-    "    go.Scatter(x=[], y=[], mode='lines+markers', name='Train Loss', line=dict(color='blue')),\n",
-    "    row=1, col=1\n",
-    ")\n",
-    "fig_base.add_trace(\n",
-    "    go.Scatter(x=[], y=[], mode='lines+markers', name='Val Loss', line=dict(color='red')),\n",
-    "    row=1, col=1\n",
-    ")\n",
-    "fig_base.add_trace(\n",
-    "    go.Scatter(x=[], y=[], mode='lines+markers', name='Train Pearson', line=dict(color='green')),\n",
-    "    row=1, col=2\n",
-    ")\n",
-    "fig_base.add_trace(\n",
-    "    go.Scatter(x=[], y=[], mode='lines+markers', name='Val Pearson', line=dict(color='orange')),\n",
-    "    row=1, col=2\n",
-    ")\n",
-    "\n",
-    "fig_base.update_xaxes(title_text=\"Step\", row=1, col=1)\n",
-    "fig_base.update_xaxes(title_text=\"Step\", row=1, col=2)\n",
-    "fig_base.update_yaxes(title_text=\"Loss\", row=1, col=1)\n",
-    "fig_base.update_yaxes(title_text=\"Pearson Correlation\", row=1, col=2)\n",
-    "fig_base.update_layout(height=800, width=1600, showlegend=True, title_text=\"Training\")\n",
-    "\n",
-    "# Convert to FigureWidget for interactive updates\n",
-    "fig = FigureWidget(fig_base)\n",
-    "\n",
-    "# Display initial plot (will update in place during training)\n",
-    "display(fig)\n",
-    "\n",
-    "# Create iterator for training data (will cycle if needed)\n",
-    "train_iter = iter(train_loader)\n",
-    "\n",
-    "# Main training loop\n",
-    "for step_idx in range(config[\"num_steps_training\"]):\n",
-    "    try:\n",
-    "        batch = next(train_iter)\n",
-    "    except StopIteration:\n",
-    "        # Restart iterator if we run out of data\n",
-    "        train_iter = iter(train_loader)\n",
-    "        batch = next(train_iter)\n",
-    "    \n",
-    "    # Forward pass and backward pass\n",
-    "    loss = train_step(model, batch)\n",
-    "    \n",
-    "    # Update optimizer\n",
-    "    optimizer.step()\n",
-    "    optimizer.zero_grad()\n",
-    "    \n",
-    "    # Update metrics\n",
-    "    tokens = batch[\"tokens\"].to(device)\n",
-    "    bigwig_targets = batch[\"bigwig_targets\"].to(device)\n",
-    "    with torch.no_grad():\n",
-    "        outputs = model(tokens=tokens)\n",
-    "        bigwig_logits = outputs[\"bigwig_tracks_logits\"]\n",
-    "        \n",
-    "        train_metrics.update(\n",
-    "            predictions=bigwig_logits,\n",
-    "            targets=bigwig_targets,\n",
-    "            loss=loss\n",
-    "        )\n",
-    "    \n",
-    "    # Logging\n",
-    "    if (step_idx + 1) % config[\"log_every_n_steps\"] == 0:\n",
-    "        train_metrics_dict = train_metrics.compute()\n",
-    "        current_lr = optimizer.param_groups[0]['lr']\n",
-    "        \n",
-    "        # Track metrics for plotting\n",
-    "        train_steps.append(step_idx + 1)\n",
-    "        train_losses.append(loss)\n",
-    "        train_pearson_scores.append(train_metrics_dict['mean/pearson'])\n",
-    "        \n",
-    "        # Update plots - direct assignment to FigureWidget data updates the plot automatically\n",
-    "        fig.data[0].x = train_steps\n",
-    "        fig.data[0].y = train_losses\n",
-    "        fig.data[2].x = train_steps\n",
-    "        fig.data[2].y = train_pearson_scores\n",
-    "        \n",
-    "        print(f\"Step {step_idx + 1}/{config[\"num_steps_training\"]} | \"\n",
-    "              f\"Loss: {loss:.4f} | \"\n",
-    "              f\"Mean Pearson: {train_metrics_dict['mean/pearson']:.4f} | \"\n",
-    "              f\"LR: {current_lr:.2e}\")\n",
-    "        train_metrics.reset()\n",
-    "    \n",
-    "    # Validation\n",
-    "    if (step_idx + 1) % config[\"validate_every_n_steps\"] == 0:\n",
-    "        print(f\"\\nRunning validation at step {step_idx + 1}...\")\n",
-    "        val_metrics.reset()\n",
-    "        model.eval()\n",
-    "        \n",
-    "        val_batch_losses = []\n",
-    "        for val_batch in val_loader:\n",
-    "            val_loss = validation_step(model, val_batch, val_metrics)\n",
-    "            val_batch_losses.append(val_loss)\n",
-    "        \n",
-    "        # Print validation metrics\n",
-    "        val_metrics_dict = val_metrics.compute()\n",
-    "        val_loss_mean = np.mean(val_batch_losses)\n",
-    "        val_pearson_mean = val_metrics_dict['mean/pearson']\n",
-    "        \n",
-    "        # Track validation metrics\n",
-    "        val_steps.append(step_idx + 1)\n",
-    "        val_losses.append(val_loss_mean)\n",
-    "        val_pearson_scores.append(val_pearson_mean)\n",
-    "        \n",
-    "        # Update plots with validation data - direct assignment updates the plot automatically\n",
-    "        fig.data[1].x = val_steps\n",
-    "        fig.data[1].y = val_losses\n",
-    "        fig.data[3].x = val_steps\n",
-    "        fig.data[3].y = val_pearson_scores\n",
-    "        \n",
-    "        print(f\"  Validation Loss: {val_loss_mean:.4f}\")\n",
-    "        print(f\"  Validation Mean Pearson: {val_pearson_mean:.4f}\")\n",
-    "        for track_name in config[\"bigwig_file_ids\"]:\n",
-    "            print(f\"    {track_name}/pearson: {val_metrics_dict[f'{track_name}/pearson']:.4f}\")\n",
-    "        \n",
-    "        model.train()  # Back to training mode\n",
-    "\n",
-    "print(f\"\\nTraining completed after {config[\"num_steps_training\"]} steps.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 10. Test evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Running test evaluation with 12 steps (100 samples)\n",
-      "\n",
-      "==================================================\n",
-      "Test Set Results\n",
-      "==================================================\n",
-      "\n",
-      "Metrics:\n",
-      "  Mean Pearson: 0.1787\n",
-      "    ENCFF884LDL/pearson: 0.1787\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Calculate number of test steps (based on deepspeed pipeline)\n",
-    "num_test_samples = len(test_dataset)\n",
-    "num_test_steps = num_test_samples // config[\"batch_size\"]\n",
-    "print(f\"Running test evaluation with {num_test_steps} steps ({num_test_samples} samples)\")\n",
-    "\n",
-    "# Set model to eval mode\n",
-    "model.eval()\n",
-    "\n",
-    "for test_batch in test_loader:        \n",
-    "\n",
-    "    _ = validation_step(        \n",
-    "        model, \n",
-    "        test_batch, \n",
-    "        test_metrics,\n",
-    "    )\n",
-    "    \n",
-    "# Compute final test metrics\n",
-    "test_metrics_dict = test_metrics.compute()\n",
-    "print(\"\\n\" + \"=\"*50)\n",
-    "print(\"Test Set Results\")\n",
-    "print(\"=\"*50)\n",
-    "print(f\"\\nMetrics:\")\n",
-    "print(f\"  Mean Pearson: {test_metrics_dict['mean/pearson']:.4f}\")\n",
-    "for track_name in config[\"bigwig_file_ids\"]:    \n",
-    "    print(f\"    {track_name}/pearson: {test_metrics_dict[f'{track_name}/pearson']:.4f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.12 (ntv3-env)",
-   "language": "python",
-   "name": "ntv3-env"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd2b425dc0d358a64ac0e27c1c8b32eef79069b995edcdf2b81549988ac97026
+size 14418415