Spaces:

InstaDeepAI
/

ntv3_tracks

Running on Zero

App Files Files Community

bernardo-de-almeida commited on Dec 17, 2025

Commit

b65f002

1 Parent(s): beb6a82

refactor: clean code

Browse files

Files changed (4) hide show

app.py +81 -59
bigwig_export.py +11 -7
ntv3_tracks_pipeline.py +72 -61
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -8,11 +8,10 @@ from pathlib import Path
 import gradio as gr
 import matplotlib
 import matplotlib.colors as mcolors
-import matplotlib.pyplot as plt
 import numpy as np
 import plotly.graph_objects as go
-from plotly.subplots import make_subplots
 import torch
 from bigwig_export import _softmax_last, create_bigwig_zip
 from ntv3_tracks_pipeline import (
@@ -57,7 +56,7 @@ def load_pipeline(model_id: str, species: str = DEFAULT_SPECIES):
     pipe = load_ntv3_tracks_pipeline(
         model=model_id,
         token=HF_TOKEN,
-        device="cpu",  # This prevents the pipeline constructor from doing model.to(cuda) during import.
         default_species=species,
         verbose=False,
     )
@@ -100,25 +99,29 @@ try:
 except Exception:
     def gpu(*args, **kwargs):
         def wrap(fn):
             return fn
         return wrap
-def _global_stride(L: int, target: int) -> int:
-    if target <= 0 or L <= target:
         return 1
-    return int(np.ceil(L / target))
-def _make_tracks_figure(x: np.ndarray, series: list[tuple[str, np.ndarray]], region: str = ""):
     """Create an interactive plotly figure with multiple tracks."""
     if not series:
         raise gr.Error("Nothing to plot (no tracks/elements selected).")
     n = len(series)
     # Create subplots with shared x-axis
     fig = make_subplots(
         rows=n,
@@ -140,8 +143,10 @@ def _make_tracks_figure(x: np.ndarray, series: list[tuple[str, np.ndarray]], reg
         # Convert color to rgba for fill
         rgba = mcolors.to_rgba(color)
-        rgba_str = f"rgba({int(rgba[0]*255)}, {int(rgba[1]*255)}, {int(rgba[2]*255)}, 0.3)"
         # Add filled area (fill_between equivalent)
         fig.add_trace(
             go.Scatter(
@@ -149,12 +154,12 @@ def _make_tracks_figure(x: np.ndarray, series: list[tuple[str, np.ndarray]], reg
                 y=y,
                 mode="lines",
                 name=title,
-                line=dict(color=color, width=1.5),
                 fill="tozeroy",
                 fillcolor=rgba_str,
-                hovertemplate=f"<b>{title}</b><br>" +
-                             "Position: %{x}<br>" +
-                             "Value: %{y:.4f}<extra></extra>",
                 showlegend=False,
             ),
             row=i,
@@ -165,7 +170,7 @@ def _make_tracks_figure(x: np.ndarray, series: list[tuple[str, np.ndarray]], reg
     fig.update_layout(
         height=150 * n,  # Adjust height based on number of tracks
         width=1200,
-        margin=dict(l=80, r=20, t=40, b=60),
         hovermode="x unified",  # Show all values at same x position
         template="plotly_white",
     )
@@ -278,7 +283,7 @@ def _format_track_for_display(track_id: str) -> str:
 def _extract_track_id(display_value: str) -> str:
-    """Extract track ID from display format 'display_name (track_id)' or return as-is."""
     if " (" in display_value and display_value.endswith(")"):
         # Extract track_id from format "display_name (track_id)"
         return display_value.rsplit(" (", 1)[1][:-1]
@@ -455,6 +460,7 @@ def update_coords_on_species_change(species: str):
 def reset_on_species_change(species: str):
     # Clear results + selected when species changes (avoids mismatched IDs)
     try:
         track_ids = _get_bigwig_names(species)  # warms cache if available
@@ -500,6 +506,7 @@ def predict(
     bigwig_selected: list[str],
     bed_elements: list[str],
 ):
     tprint("start")
     # Debug: verify species is being passed
@@ -515,10 +522,11 @@ def predict(
     if use_coords:
         # Check if this species supports coordinate-based fetching
         if species not in SPECIES_WITH_COORDINATE_SUPPORT:
             raise gr.Error(
-                f"Species '{species}' does not support coordinate-based sequence fetching. "
-                f"Please provide a DNA sequence directly or use one of the supported species: "
-                f"{', '.join(sorted(SPECIES_WITH_COORDINATE_SUPPORT))}"
             )
         if not chrom:
             raise gr.Error("chrom is required when use_coords=True")
@@ -537,8 +545,10 @@ def predict(
     # Verify species is in inputs before calling pipeline
     if "species" not in inputs:
         raise gr.Error(
-            f"Internal error: species not found in inputs dict. Inputs: {list(inputs.keys())}"
         )
     tprint("inputs prepared")
@@ -576,12 +586,15 @@ def predict(
     if not has_bigwigs and not has_bed:
         raise gr.Error(
-            "No BigWig tracks or BED elements available for this species in the current model."
         )
     if not has_bigwigs and bigwig_selected:
         raise gr.Error(
-            "No BigWig tracks available for this species, but BigWig tracks were selected. Please deselect BigWig tracks or choose a different species."
         )
     # Defaults if user picked none
@@ -617,17 +630,17 @@ def predict(
     # Determine sequence length from available data
     if has_bigwigs:
-        L = bw.shape[0]
     elif has_bed:
-        L = bed_logits.shape[0]
     else:
         raise gr.Error("No data available for plotting.")
-    stride = _global_stride(L, PLOT_TARGET_POINTS)
     x0 = int(out.pred_start or 0)
-    x1 = int(out.pred_end or (x0 + L))
-    x = np.linspace(x0, x1, num=L, endpoint=False)[::stride]
     series: list[tuple[str, np.ndarray]] = []
@@ -645,14 +658,14 @@ def predict(
             series.append((ename, probs[:, eidx, 1][::stride].astype(float)))
     tprint("figure data processed created")
     # Build region string for x-axis label
     region = (
         f"{out.chrom}:{out.pred_start}-{out.pred_end}" if out.chrom else f"{x0}-{x1}"
     )
     if out.assembly:
         region += f" ({out.assembly})"
     fig = _make_tracks_figure(x, series, region=region)
     tprint("figure created")
@@ -680,7 +693,10 @@ def predict(
 # -----------------------------
 CSS = """
 #tracks_plot { position: relative; width: 100% !important; max-width: 100% !important; }
-#tracks_plot .wrap, #tracks_plot .plot-container { width: 100% !important; max-width: 100% !important; }
 #tracks_plot_download {
   position: absolute;
@@ -916,7 +932,8 @@ function addDownloadIcon() {
   btn.title = "Download PNG";
   btn.innerHTML = `
     <svg viewBox="0 0 24 24" aria-hidden="true">
-      <path d="M5 20h14v-2H5v2zm7-18v10.17l3.59-3.58L17 10l-5 5-5-5 1.41-1.41L11 12.17V2h1z"/>
     </svg>
   `;
   btn.onclick = () => {
@@ -1024,8 +1041,10 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
   <div class="intro-card">
     <h3>2) Choose signals</h3>
     <ul>
-      <li>Search & select <strong>BigWig functional tracks</strong> (RNA-seq, ChIP-seq, DNase…)</li>
-      <li>Select <strong>BED genome annotation elements</strong> (exons, introns, promoters…)</li>
     </ul>
   </div>
@@ -1041,10 +1060,12 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
 <div class="intro-tip">
   <span class="intro-tip-icon">💡</span>
-  <span><strong>Tip:</strong> The demo includes default settings that you can use to get started, taking ~ 15 seconds to run for the example on human.</span>
 </div>
-<div style="margin-top: 16px; padding: 12px; background: rgba(0,0,0,0.03); border-radius: 12px; font-size: 0.95rem;">
   <strong>Available species:</strong> {_all_species_list}<br>
   <br>
   <strong>Species with functional tracks:</strong> {_bigwig_species_list}
@@ -1059,8 +1080,8 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
     # Model display names (without InstaDeepAI/ prefix) and their full IDs
     MODEL_OPTIONS = {
-        "NTv3 650M (pos)": "InstaDeepAI/NTv3_650M_pos",
-        "NTv3 100M (pos)": "InstaDeepAI/NTv3_100M_pos",
     }
     # Reverse mapping: full ID -> display name
@@ -1112,11 +1133,9 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
             + ")"
         )
     with gr.Row():
-            chrom = gr.Textbox(label="Chromosome", value=_default_coords["chrom"])
-            start = gr.Number(
-                label="Start", value=_default_coords["start"], precision=0
-            )
-            end = gr.Number(label="End", value=_default_coords["end"], precision=0)
     # DNA sequence section - visible only when "Enter DNA sequence" is selected
     # Using Textbox directly (not wrapped in Group) to avoid visual border/line
@@ -1189,7 +1208,8 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
     )
     bigwig_no_tracks_msg = gr.Markdown(
-        "⚠️ No functional genomic tracks available for this species in the current model.",
         visible=False,
     )
@@ -1318,19 +1338,18 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
             current_species, current_query, upd["value"]
         )
-        # Create a completely fresh update with explicit empty value to prevent any checked state
-        # Force Gradio to clear checked state by explicitly setting value to empty list
-        # Use a workaround: set choices to empty first, then to new_choices to force a complete refresh
-        # But since we can only return one update, we'll ensure value is explicitly empty
-        # and that we're not preserving any state from the previous update
-        # Ensure no items from results_checked are in new_choices (they should already be filtered, but double-check)
         checked_track_ids = {_extract_track_id(x) for x in results_checked}
         new_choices_filtered = [
             c for c in new_choices if _extract_track_id(c) not in checked_track_ids
         ]
-        # Create update with explicit empty value - this should force Gradio to clear all checked items
         fresh_update = gr.update(
             choices=new_choices_filtered,
             value=[],  # CRITICAL: Explicitly empty list to clear all checked state
@@ -1366,7 +1385,7 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
                 results_choices = (
                     results_update.choices if hasattr(results_update, "choices") else []
                 )
-            except:
                 # Fallback: get choices from the search function directly
                 results_choices = _get_search_results_choices(
                     current_species,
@@ -1395,10 +1414,12 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
     ):
         """Update selected tracks when user checks/unchecks items directly."""
         # selected_value contains only the currently checked items
-        # Update choices to match the current selections (so unchecked items are removed)
         show_selected = bool(selected_value)
-        # Also update search results to reflect the new selection (tracks that were unchecked can now appear in results)
         search_updates = search_bigwigs(current_species, current_query, selected_value)
         return (
@@ -1462,7 +1483,7 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
                 ]
                 # Show selected tracks section if there are default tracks
                 show_selected_tracks = bool(default_formatted)
-            except:
                 formatted_tracks = []
                 default_formatted = []
                 show_selected_tracks = False
@@ -1594,12 +1615,13 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
         try:
             zip_path = create_bigwig_zip(out, bw_selected, bed_selected)
             return gr.update(value=zip_path, visible=True)
-        except ImportError as e:
             raise gr.Error(
-                "pyBigWig is required for BigWig export. Install with: pip install pyBigWig"
             )
-        except Exception as e:
-            raise gr.Error(f"Error creating BigWig files: {str(e)}")
     download_bigwig_btn.click(
         fn=download_bigwig_zip,

 import gradio as gr
 import matplotlib
 import matplotlib.colors as mcolors
 import numpy as np
 import plotly.graph_objects as go
 import torch
+from plotly.subplots import make_subplots
 from bigwig_export import _softmax_last, create_bigwig_zip
 from ntv3_tracks_pipeline import (
     pipe = load_ntv3_tracks_pipeline(
         model=model_id,
         token=HF_TOKEN,
+        device="cpu",  # Prevents model.to(cuda) during import
         default_species=species,
         verbose=False,
     )
 except Exception:
     def gpu(*args, **kwargs):
+        """GPU decorator placeholder when spaces module is not available."""
         def wrap(fn):
             return fn
         return wrap
+def _global_stride(length: int, target: int) -> int:
+    if target <= 0 or length <= target:
         return 1
+    return int(np.ceil(length / target))
+def _make_tracks_figure(
+    x: np.ndarray, series: list[tuple[str, np.ndarray]], region: str = ""
+):
     """Create an interactive plotly figure with multiple tracks."""
     if not series:
         raise gr.Error("Nothing to plot (no tracks/elements selected).")
     n = len(series)
     # Create subplots with shared x-axis
     fig = make_subplots(
         rows=n,
         # Convert color to rgba for fill
         rgba = mcolors.to_rgba(color)
+        rgba_str = (
+            f"rgba({int(rgba[0]*255)}, {int(rgba[1]*255)}, {int(rgba[2]*255)}, 0.3)"
+        )
         # Add filled area (fill_between equivalent)
         fig.add_trace(
             go.Scatter(
                 y=y,
                 mode="lines",
                 name=title,
+                line={"color": color, "width": 1.5},
                 fill="tozeroy",
                 fillcolor=rgba_str,
+                hovertemplate=f"<b>{title}</b><br>"
+                + "Position: %{x}<br>"
+                + "Value: %{y:.4f}<extra></extra>",
                 showlegend=False,
             ),
             row=i,
     fig.update_layout(
         height=150 * n,  # Adjust height based on number of tracks
         width=1200,
+        margin={"l": 80, "r": 20, "t": 40, "b": 60},
         hovermode="x unified",  # Show all values at same x position
         template="plotly_white",
     )
 def _extract_track_id(display_value: str) -> str:
+    """Extract track ID from display format or return as-is."""
     if " (" in display_value and display_value.endswith(")"):
         # Extract track_id from format "display_name (track_id)"
         return display_value.rsplit(" (", 1)[1][:-1]
 def reset_on_species_change(species: str):
+    """Reset search and selected tracks when species changes."""
     # Clear results + selected when species changes (avoids mismatched IDs)
     try:
         track_ids = _get_bigwig_names(species)  # warms cache if available
     bigwig_selected: list[str],
     bed_elements: list[str],
 ):
+    """Run prediction and return figure with tracks."""
     tprint("start")
     # Debug: verify species is being passed
     if use_coords:
         # Check if this species supports coordinate-based fetching
         if species not in SPECIES_WITH_COORDINATE_SUPPORT:
+            supported = ", ".join(sorted(SPECIES_WITH_COORDINATE_SUPPORT))
             raise gr.Error(
+                f"Species '{species}' does not support coordinate-based sequence "
+                f"fetching. Please provide a DNA sequence directly or use one of "
+                f"the supported species: {supported}"
             )
         if not chrom:
             raise gr.Error("chrom is required when use_coords=True")
     # Verify species is in inputs before calling pipeline
     if "species" not in inputs:
+        input_keys = list(inputs.keys())
         raise gr.Error(
+            f"Internal error: species not found in inputs dict. "
+            f"Inputs: {input_keys}"
         )
     tprint("inputs prepared")
     if not has_bigwigs and not has_bed:
         raise gr.Error(
+            "No BigWig tracks or BED elements available for this species "
+            "in the current model."
         )
     if not has_bigwigs and bigwig_selected:
         raise gr.Error(
+            "No BigWig tracks available for this species, but BigWig tracks "
+            "were selected. Please deselect BigWig tracks or choose a "
+            "different species."
         )
     # Defaults if user picked none
     # Determine sequence length from available data
     if has_bigwigs:
+        seq_length = bw.shape[0]
     elif has_bed:
+        seq_length = bed_logits.shape[0]
     else:
         raise gr.Error("No data available for plotting.")
+    stride = _global_stride(seq_length, PLOT_TARGET_POINTS)
     x0 = int(out.pred_start or 0)
+    x1 = int(out.pred_end or (x0 + seq_length))
+    x = np.linspace(x0, x1, num=seq_length, endpoint=False)[::stride]
     series: list[tuple[str, np.ndarray]] = []
             series.append((ename, probs[:, eidx, 1][::stride].astype(float)))
     tprint("figure data processed created")
     # Build region string for x-axis label
     region = (
         f"{out.chrom}:{out.pred_start}-{out.pred_end}" if out.chrom else f"{x0}-{x1}"
     )
     if out.assembly:
         region += f" ({out.assembly})"
     fig = _make_tracks_figure(x, series, region=region)
     tprint("figure created")
 # -----------------------------
 CSS = """
 #tracks_plot { position: relative; width: 100% !important; max-width: 100% !important; }
+#tracks_plot .wrap, #tracks_plot .plot-container {
+  width: 100% !important;
+  max-width: 100% !important;
+}
 #tracks_plot_download {
   position: absolute;
   btn.title = "Download PNG";
   btn.innerHTML = `
     <svg viewBox="0 0 24 24" aria-hidden="true">
+      <path d="M5 20h14v-2H5v2zm7-18v10.17l3.59-3.58L17 10l-5 5-5-5
+        1.41-1.41L11 12.17V2h1z"/>
     </svg>
   `;
   btn.onclick = () => {
   <div class="intro-card">
     <h3>2) Choose signals</h3>
     <ul>
+      <li>Search & select <strong>BigWig functional tracks</strong>
+        (RNA-seq, ChIP-seq, DNase…)</li>
+      <li>Select <strong>BED genome annotation elements</strong>
+        (exons, introns, promoters…)</li>
     </ul>
   </div>
 <div class="intro-tip">
   <span class="intro-tip-icon">💡</span>
+  <span><strong>Tip:</strong> The demo includes default settings that you can use
+    to get started, taking ~ 15 seconds to run for the example on human.</span>
 </div>
+<div style="margin-top: 16px; padding: 12px; background: rgba(0,0,0,0.03);
+  border-radius: 12px; font-size: 0.95rem;">
   <strong>Available species:</strong> {_all_species_list}<br>
   <br>
   <strong>Species with functional tracks:</strong> {_bigwig_species_list}
     # Model display names (without InstaDeepAI/ prefix) and their full IDs
     MODEL_OPTIONS = {
+        "NTv3 650M (post)": "InstaDeepAI/NTv3_650M_pos",
+        "NTv3 100M (post)": "InstaDeepAI/NTv3_100M_pos",
     }
     # Reverse mapping: full ID -> display name
             + ")"
         )
     with gr.Row():
+        chrom = gr.Textbox(label="Chromosome", value=_default_coords["chrom"])
+        start = gr.Number(label="Start", value=_default_coords["start"], precision=0)
+        end = gr.Number(label="End", value=_default_coords["end"], precision=0)
     # DNA sequence section - visible only when "Enter DNA sequence" is selected
     # Using Textbox directly (not wrapped in Group) to avoid visual border/line
     )
     bigwig_no_tracks_msg = gr.Markdown(
+        "⚠️ No functional genomic tracks available for this species "
+        "in the current model.",
         visible=False,
     )
             current_species, current_query, upd["value"]
         )
+        # Create a completely fresh update with explicit empty value
+        # to prevent any checked state. Force Gradio to clear checked state
+        # by explicitly setting value to empty list.
+        # Ensure no items from results_checked are in new_choices
+        # (they should already be filtered, but double-check)
         checked_track_ids = {_extract_track_id(x) for x in results_checked}
         new_choices_filtered = [
             c for c in new_choices if _extract_track_id(c) not in checked_track_ids
         ]
+        # Create update with explicit empty value
+        # This should force Gradio to clear all checked items
         fresh_update = gr.update(
             choices=new_choices_filtered,
             value=[],  # CRITICAL: Explicitly empty list to clear all checked state
                 results_choices = (
                     results_update.choices if hasattr(results_update, "choices") else []
                 )
+            except Exception:
                 # Fallback: get choices from the search function directly
                 results_choices = _get_search_results_choices(
                     current_species,
     ):
         """Update selected tracks when user checks/unchecks items directly."""
         # selected_value contains only the currently checked items
+        # Update choices to match current selections
+        # (unchecked items are removed)
         show_selected = bool(selected_value)
+        # Also update search results to reflect new selection
+        # (unchecked tracks can now appear in results)
         search_updates = search_bigwigs(current_species, current_query, selected_value)
         return (
                 ]
                 # Show selected tracks section if there are default tracks
                 show_selected_tracks = bool(default_formatted)
+            except Exception:
                 formatted_tracks = []
                 default_formatted = []
                 show_selected_tracks = False
         try:
             zip_path = create_bigwig_zip(out, bw_selected, bed_selected)
             return gr.update(value=zip_path, visible=True)
+        except ImportError:
             raise gr.Error(
+                "pyBigWig is required for BigWig export. "
+                "Install with: pip install pyBigWig"
             )
+        except Exception as exc:
+            raise gr.Error(f"Error creating BigWig files: {str(exc)}")
     download_bigwig_btn.click(
         fn=download_bigwig_zip,

bigwig_export.py CHANGED Viewed

@@ -11,9 +11,9 @@ from typing import TYPE_CHECKING
 import numpy as np
 try:
-    import pyBigWig
 except ImportError:
-    pyBigWig = None
 if TYPE_CHECKING:
     from ntv3_tracks_pipeline import NTv3TracksOutput
@@ -75,16 +75,20 @@ def create_bigwig_zip(
     chrom = out.chrom
     if chrom is None:
         raise ValueError(
-            "Chromosome information not available. Use genomic coordinates for BigWig export."
         )
     start = out.start
     end = out.end
     window_len = out.window_len or (end - start)
     # Calculate prediction region (center 37.5%)
-    pred_start = out.pred_start or (start + int(window_len * 0.3125))
-    pred_end = out.pred_end or (pred_start + int(window_len * 0.375))
     # Create temporary directory for BigWig files
     tmpdir = tempfile.gettempdir()
@@ -160,11 +164,11 @@ def create_bigwig_zip(
     for bw_file in created_files:
         try:
             os.remove(bw_file)
-        except:
             pass
     try:
         os.rmdir(output_dir)
-    except:
         pass
     return zip_path

 import numpy as np
 try:
+    import pyBigWig  # noqa: N816
 except ImportError:
+    pyBigWig = None  # noqa: N816
 if TYPE_CHECKING:
     from ntv3_tracks_pipeline import NTv3TracksOutput
     chrom = out.chrom
     if chrom is None:
         raise ValueError(
+            "Chromosome information not available. Use genomic coordinates."
         )
     start = out.start
     end = out.end
+    if start is None or end is None:
+        raise ValueError("Start and end coordinates are required for BigWig export.")
     window_len = out.window_len or (end - start)
     # Calculate prediction region (center 37.5%)
+    if out.pred_start is not None:
+        pred_start = out.pred_start
+    else:
+        pred_start = start + int(window_len * 0.3125)
     # Create temporary directory for BigWig files
     tmpdir = tempfile.gettempdir()
     for bw_file in created_files:
         try:
             os.remove(bw_file)
+        except Exception:
             pass
     try:
         os.rmdir(output_dir)
+    except Exception:
         pass
     return zip_path

ntv3_tracks_pipeline.py CHANGED Viewed

@@ -74,13 +74,13 @@ SPECIES_WITH_COORDINATE_SUPPORT = {
 # Assembly -> API URL template mapping
 # ---------------------------------------------------------------------
 # Default API URL template (UCSC format) that works for most species
-DEFAULT_API_URL_TEMPLATE = "https://api.genome.ucsc.edu/getData/sequence?genome={assembly};chrom={chrom};start={start};end={end}"
 # for species with different format, add the assembly name to the mapping
 # The template should use {chrom}, {start}, and {end} as placeholders.
 ASSEMBLY_TO_API_URL_TEMPLATE = {
     # Arabidopsis thaliana (TAIR10) - uses hub URL format
-    "TAIR10": "https://api.genome.ucsc.edu/getData/sequence?hubUrl=http://genome.ucsc.edu/goldenPath/help/examples/hubExamples/hubAssembly/plantAraTha1/hub.txt;genome=araTha1;chrom={chrom};start={start};end={end}",
 }
@@ -124,7 +124,8 @@ def _get_dna_sequence(assembly: str, chrom: str, start: int, end: int) -> str:
     """
     if requests is None:
         raise ImportError(
-            "requests is required for genome download. Install with: pip install requests"
         )
     # Get API URL template for this assembly, or use default
@@ -151,12 +152,11 @@ def _ensure_fasta_for_assembly(assembly: str, cache_dir: str | Path) -> Path:
     if fa_path.exists():
         return fa_path
-    if assembly not in ASSEMBLY_TO_UCSC_FA_GZ:
-        raise ValueError(
-            f"No download URL configured for assembly='{assembly}'. "
-            f"Supported for auto-download: {sorted(ASSEMBLY_TO_UCSC_FA_GZ.keys())}. "
-            f"Either pass fasta_path explicitly, or extend ASSEMBLY_TO_UCSC_FA_GZ."
-        )
     import gzip
@@ -340,7 +340,8 @@ class NTv3TracksPipeline(Pipeline):
         else:
             self.tokenizer = tokenizer
-        # Extract model_id from config if not already set (following ntv3_gff_pipeline.py pattern)
         if self.model_id is None and self.config is not None:
             self.model_id = getattr(self.config, "_name_or_path", None) or getattr(
                 self.config, "name_or_path", None
@@ -374,29 +375,57 @@ class NTv3TracksPipeline(Pipeline):
             model=self.model, tokenizer=self.tokenizer, device=-1, **kwargs
         )
     def _sanitize_parameters(self, **kwargs):
         return {}, {}, {}
-    def _get_model_device(self) -> torch.device:
         return next(self.model.parameters()).device
     def _resolve_species_and_assembly(self, inputs: dict[str, Any]) -> tuple[str, str]:
         species = inputs.get("species", self.default_species)
         if species not in SPECIES_TO_ASSEMBLY:
             raise ValueError(
-                f"Unsupported species='{species}'. Supported species: {sorted(SPECIES_TO_ASSEMBLY.keys())}"
             )
         assembly = SPECIES_TO_ASSEMBLY[species]
         cfg_assemblies = list(self.config.bigwigs_per_file_assembly.keys())
         if assembly not in cfg_assemblies:
             raise ValueError(
-                f"Species '{species}' maps to assembly '{assembly}', but that assembly is not available in this checkpoint. "
                 f"Available assemblies: {cfg_assemblies}"
             )
         return species, assembly
-    def _maybe_force_cpu_for_mps_long(
         self, input_ids_cpu: torch.Tensor
     ) -> torch.device:
         dev = self._get_model_device()
@@ -405,40 +434,15 @@ class NTv3TracksPipeline(Pipeline):
             if seq_len >= self.mps_force_cpu_length:
                 if self.verbose:
                     print(
-                        f"[NTv3TracksPipeline] MPS detected and input is long (tokens={seq_len}). "
-                        "Switching model + inputs to CPU for this run."
                     )
                 self.model.to("cpu")
                 self.model.eval()
                 return torch.device("cpu")
         return dev
-    def available_bigwig_track_names(self, species: str | None = None) -> list[str]:
-        """
-        Return BigWig track IDs for the assembly corresponding to `species`.
-        No model forward pass.
-        """
-        sp = species or self.default_species
-        assembly = SPECIES_TO_ASSEMBLY.get(sp)
-        if assembly is None:
-            raise ValueError(
-                f"Unknown species={sp}. Supported: {sorted(SPECIES_TO_ASSEMBLY.keys())}"
-            )
-        if assembly not in self.config.bigwigs_per_file_assembly:
-            raise ValueError(
-                f"Assembly {assembly} not found in checkpoint config. "
-                f"Available: {list(self.config.bigwigs_per_file_assembly.keys())}"
-            )
-        return list(self.config.bigwigs_per_file_assembly[assembly])
-    def available_bed_element_names(self) -> list[str]:
-        """
-        Return BED element names available in this checkpoint (no forward pass).
-        """
-        return list(self.bed_element_names or [])
     def preprocess(self, inputs: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
         species, assembly = self._resolve_species_and_assembly(inputs)
@@ -506,19 +510,6 @@ class NTv3TracksPipeline(Pipeline):
     def forward(self, model_inputs, **forward_params):
         return self._forward(model_inputs, **forward_params)
-    def _forward(self, model_inputs: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
-        meta = model_inputs.pop("meta")
-        if self.verbose:
-            print(f"Running on device: {self._get_model_device()}")
-        with torch.no_grad():
-            out = self.model(
-                input_ids=model_inputs["input_ids"],
-                species_ids=model_inputs["species_ids"],
-                return_dict=True,
-            )
-        out["meta"] = meta
-        return out
     def postprocess(
         self, model_outputs: dict[str, Any], **kwargs: Any
     ) -> NTv3TracksOutput:
@@ -565,6 +556,19 @@ class NTv3TracksPipeline(Pipeline):
             pred_end=meta.get("pred_end"),
         )
     def __call__(
         self,
         inputs,
@@ -584,7 +588,8 @@ class NTv3TracksPipeline(Pipeline):
         if plot:
             if out.bigwig_track_names is None:
                 raise ValueError(
-                    "bigwig_track_names missing; expected cfg.bigwigs_per_file_assembly[assembly]."
                 )
             if out.bed_element_names is None:
                 raise ValueError("bed element names missing from config.")
@@ -600,17 +605,22 @@ class NTv3TracksPipeline(Pipeline):
             ]
             if missing_tracks:
                 raise ValueError(
-                    f"The following tracks are not available in bigwig_names: {missing_tracks}\n"
-                    f"First 50 available: {bigwig_names[:50]}{'...' if len(bigwig_names) > 50 else ''}"
                 )
             missing_elements = [
                 e for e in elements_to_plot if e not in bed_element_names
             ]
             if missing_elements:
                 raise ValueError(
-                    f"The following elements are not available in bed_element_names: {missing_elements}\n"
-                    f"First 50 available: {bed_element_names[:50]}{'...' if len(bed_element_names) > 50 else ''}"
                 )
             # Build bigwig tracks dict (title -> y)
@@ -662,7 +672,8 @@ def load_ntv3_tracks_pipeline(
     device:
         "auto", "cpu", "cuda", "mps"
     pipeline_kwargs:
-        Extra kwargs passed to NTv3TracksPipeline (default_species, genome_cache_dir, etc.).
     """
     pipe = NTv3TracksPipeline(
         model=model,

 # Assembly -> API URL template mapping
 # ---------------------------------------------------------------------
 # Default API URL template (UCSC format) that works for most species
+DEFAULT_API_URL_TEMPLATE = "https://api.genome.ucsc.edu/getData/sequence?genome={assembly};chrom={chrom};start={start};end={end}"  # noqa: E501
 # for species with different format, add the assembly name to the mapping
 # The template should use {chrom}, {start}, and {end} as placeholders.
 ASSEMBLY_TO_API_URL_TEMPLATE = {
     # Arabidopsis thaliana (TAIR10) - uses hub URL format
+    "TAIR10": "https://api.genome.ucsc.edu/getData/sequence?hubUrl=http://genome.ucsc.edu/goldenPath/help/examples/hubExamples/hubAssembly/plantAraTha1/hub.txt;genome=araTha1;chrom={chrom};start={start};end={end}",  # noqa: E501
 }
     """
     if requests is None:
         raise ImportError(
+            "requests is required for genome download. "
+            "Install with: pip install requests"
         )
     # Get API URL template for this assembly, or use default
     if fa_path.exists():
         return fa_path
+    # This function is deprecated - use _get_dna_sequence with API instead
+    raise ValueError(
+        f"FASTA file download is no longer supported for assembly='{assembly}'. "
+        f"Please use _get_dna_sequence() with API-based sequence fetching instead."
+    )
     import gzip
         else:
             self.tokenizer = tokenizer
+        # Extract model_id from config if not already set
+        # (following ntv3_gff_pipeline.py pattern)
         if self.model_id is None and self.config is not None:
             self.model_id = getattr(self.config, "_name_or_path", None) or getattr(
                 self.config, "name_or_path", None
             model=self.model, tokenizer=self.tokenizer, device=-1, **kwargs
         )
+    def available_bigwig_track_names(self, species: str | None = None) -> list[str]:
+        """
+        Return BigWig track IDs for the assembly corresponding to `species`.
+        No model forward pass.
+        """
+        sp = species or self.default_species
+        assembly = SPECIES_TO_ASSEMBLY.get(sp)
+        if assembly is None:
+            raise ValueError(
+                f"Unknown species={sp}. Supported: {sorted(SPECIES_TO_ASSEMBLY.keys())}"
+            )
+        if assembly not in self.config.bigwigs_per_file_assembly:
+            raise ValueError(
+                f"Assembly {assembly} not found in checkpoint config. "
+                f"Available: {list(self.config.bigwigs_per_file_assembly.keys())}"
+            )
+        return list(self.config.bigwigs_per_file_assembly[assembly])
+    def available_bed_element_names(self) -> list[str]:
+        """
+        Return BED element names available in this checkpoint (no forward pass).
+        """
+        return list(self.bed_element_names or [])
     def _sanitize_parameters(self, **kwargs):
         return {}, {}, {}
+    def _get_model_device(self) -> torch.device:  # noqa: CCE001
         return next(self.model.parameters()).device
     def _resolve_species_and_assembly(self, inputs: dict[str, Any]) -> tuple[str, str]:
         species = inputs.get("species", self.default_species)
         if species not in SPECIES_TO_ASSEMBLY:
+            supported = sorted(SPECIES_TO_ASSEMBLY.keys())
             raise ValueError(
+                f"Unsupported species='{species}'. " f"Supported species: {supported}"
             )
         assembly = SPECIES_TO_ASSEMBLY[species]
         cfg_assemblies = list(self.config.bigwigs_per_file_assembly.keys())
         if assembly not in cfg_assemblies:
             raise ValueError(
+                f"Species '{species}' maps to assembly '{assembly}', "
+                f"but that assembly is not available in this checkpoint. "
                 f"Available assemblies: {cfg_assemblies}"
             )
         return species, assembly
+    def _maybe_force_cpu_for_mps_long(  # noqa: CCE001
         self, input_ids_cpu: torch.Tensor
     ) -> torch.device:
         dev = self._get_model_device()
             if seq_len >= self.mps_force_cpu_length:
                 if self.verbose:
                     print(
+                        f"[NTv3TracksPipeline] MPS detected and input is long "
+                        f"(tokens={seq_len}). Switching model + inputs to CPU "
+                        "for this run."
                     )
                 self.model.to("cpu")
                 self.model.eval()
                 return torch.device("cpu")
         return dev
     def preprocess(self, inputs: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
         species, assembly = self._resolve_species_and_assembly(inputs)
     def forward(self, model_inputs, **forward_params):
         return self._forward(model_inputs, **forward_params)
     def postprocess(
         self, model_outputs: dict[str, Any], **kwargs: Any
     ) -> NTv3TracksOutput:
             pred_end=meta.get("pred_end"),
         )
+    def _forward(self, model_inputs: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
+        meta = model_inputs.pop("meta")
+        if self.verbose:
+            print(f"Running on device: {self._get_model_device()}")
+        with torch.no_grad():
+            out = self.model(
+                input_ids=model_inputs["input_ids"],
+                species_ids=model_inputs["species_ids"],
+                return_dict=True,
+            )
+        out["meta"] = meta
+        return out
     def __call__(
         self,
         inputs,
         if plot:
             if out.bigwig_track_names is None:
                 raise ValueError(
+                    "bigwig_track_names missing; expected "
+                    "cfg.bigwigs_per_file_assembly[assembly]."
                 )
             if out.bed_element_names is None:
                 raise ValueError("bed element names missing from config.")
             ]
             if missing_tracks:
                 raise ValueError(
+                    f"The following tracks are not available in "
+                    f"bigwig_names: {missing_tracks}\n"
+                    f"First 50 available: {bigwig_names[:50]}"
+                    f"{'...' if len(bigwig_names) > 50 else ''}"
                 )
             missing_elements = [
                 e for e in elements_to_plot if e not in bed_element_names
             ]
             if missing_elements:
+                first_50 = bed_element_names[:50]
+                ellipsis = "..." if len(bed_element_names) > 50 else ""
                 raise ValueError(
+                    f"The following elements are not available in "
+                    f"bed_element_names: {missing_elements}\n"
+                    f"First 50 available: {first_50}{ellipsis}"
                 )
             # Build bigwig tracks dict (title -> y)
     device:
         "auto", "cpu", "cuda", "mps"
     pipeline_kwargs:
+        Extra kwargs passed to NTv3TracksPipeline
+        (default_species, genome_cache_dir, etc.).
     """
     pipe = NTv3TracksPipeline(
         model=model,

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 gradio>=4.0.0
 matplotlib
 numpy
 plotly
-kaleido
 pyBigWig
 pyfaidx
 requests

 gradio>=4.0.0
+kaleido
 matplotlib
 numpy
 plotly
 pyBigWig
 pyfaidx
 requests