Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
36bd60e
1
Parent(s):
25532d4
feat: clean track names
Browse files- app.py +181 -15
- data/functional_tracks_metadata.csv +0 -0
app.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
import tempfile
|
|
|
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
import gradio as gr
|
| 6 |
import asyncio
|
|
@@ -114,6 +116,85 @@ def _save_fig_png(fig) -> str:
|
|
| 114 |
# Cache track lists per species so search is instant after first load
|
| 115 |
_BIGWIG_CACHE: dict[str, list[str]] = {}
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def _get_bigwig_names(species: str) -> list[str]:
|
| 119 |
if species not in _BIGWIG_CACHE:
|
|
@@ -182,20 +263,52 @@ def _rank_search(query: str, names: list[str], limit: int) -> list[str]:
|
|
| 182 |
|
| 183 |
|
| 184 |
def search_bigwigs(species: str, query: str):
|
|
|
|
| 185 |
names = _get_bigwig_names(species)
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
return gr.update(choices=results, value=[])
|
| 188 |
|
| 189 |
|
| 190 |
def add_selected(current_selected: list[str], to_add: list[str]):
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
|
| 198 |
def remove_selected(current_selected: list[str], to_remove: list[str]):
|
|
|
|
| 199 |
cur = [x for x in (current_selected or []) if x not in set(to_remove or [])]
|
| 200 |
return gr.update(choices=cur, value=cur)
|
| 201 |
|
|
@@ -208,15 +321,21 @@ def update_coords_on_species_change(species: str):
|
|
| 208 |
def reset_on_species_change(species: str):
|
| 209 |
# Clear results + selected when species changes (avoids mismatched IDs)
|
| 210 |
try:
|
| 211 |
-
_get_bigwig_names(species) # warms cache if available
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
except (ValueError, AttributeError):
|
| 213 |
# Species doesn't have bigwigs, that's okay
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
)
|
| 220 |
|
| 221 |
|
| 222 |
# -----------------------------
|
|
@@ -236,6 +355,9 @@ def predict(
|
|
| 236 |
if not species:
|
| 237 |
raise gr.Error("Species parameter is missing. Please select a species.")
|
| 238 |
|
|
|
|
|
|
|
|
|
|
| 239 |
if use_coords:
|
| 240 |
# Check if this species supports coordinate-based fetching
|
| 241 |
if species not in SPECIES_WITH_COORDINATE_SUPPORT:
|
|
@@ -583,7 +705,9 @@ DEFAULT_BED_ELEMENTS = ["protein_coding_gene", "exon", "intron"]
|
|
| 583 |
|
| 584 |
# Get available BigWig tracks for default species and filter defaults
|
| 585 |
_init_bigwig = _get_bigwig_names(DEFAULT_SPECIES)
|
| 586 |
-
|
|
|
|
|
|
|
| 587 |
|
| 588 |
# Filter default BED elements to only those available
|
| 589 |
_init_bed_selected = [elem for elem in DEFAULT_BED_ELEMENTS if elem in _init_bed]
|
|
@@ -745,6 +869,37 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
|
|
| 745 |
|
| 746 |
gr.Markdown("## Select functional tracks")
|
| 747 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 748 |
bigwig_no_tracks_msg = gr.Markdown(
|
| 749 |
"⚠️ No functional genomic tracks available for this species in the current model.",
|
| 750 |
visible=False,
|
|
@@ -850,6 +1005,17 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
|
|
| 850 |
coords = DEFAULT_COORDS.get(species, DEFAULT_COORDS["human"])
|
| 851 |
# Show coordinates only if species is supported AND use_coords is True
|
| 852 |
show_coords = is_supported and use_coords_val
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 853 |
return (
|
| 854 |
gr.update(visible=show_coords, value=coords["chrom"]),
|
| 855 |
gr.update(visible=show_coords, value=coords["start"]),
|
|
@@ -857,7 +1023,7 @@ with gr.Blocks(title="NTv3 Tracks Demo") as demo:
|
|
| 857 |
gr.update(value=is_supported, visible=is_supported), # Show/hide and enable use_coords only if supported
|
| 858 |
gr.update(visible=show_coords), # Show/hide the row
|
| 859 |
gr.update(visible=not has_bigwigs), # Show "no tracks" message if no bigwigs
|
| 860 |
-
gr.update(visible=has_bigwigs), # Show bigwig selection if available
|
| 861 |
gr.update(visible=has_bigwigs), # Show bigwig query if available
|
| 862 |
gr.update(visible=has_bigwigs), # Show bigwig results if available
|
| 863 |
gr.update(visible=has_bigwigs), # Show bigwig buttons if available
|
|
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
import tempfile
|
| 4 |
+
import csv
|
| 5 |
+
from pathlib import Path
|
| 6 |
import numpy as np
|
| 7 |
import gradio as gr
|
| 8 |
import asyncio
|
|
|
|
| 116 |
# Cache track lists per species so search is instant after first load
|
| 117 |
_BIGWIG_CACHE: dict[str, list[str]] = {}
|
| 118 |
|
| 119 |
+
# Cache for track metadata (track_id -> display_name)
|
| 120 |
+
_TRACK_METADATA_CACHE: dict[str, str] = {}
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _load_track_metadata() -> dict[str, str]:
|
| 124 |
+
"""Load track metadata from CSV and create display name mapping."""
|
| 125 |
+
if _TRACK_METADATA_CACHE:
|
| 126 |
+
return _TRACK_METADATA_CACHE
|
| 127 |
+
|
| 128 |
+
csv_path = Path(__file__).parent / "data" / "functional_tracks_metadata.csv"
|
| 129 |
+
if not csv_path.exists():
|
| 130 |
+
return {}
|
| 131 |
+
|
| 132 |
+
metadata = {}
|
| 133 |
+
try:
|
| 134 |
+
with open(csv_path, 'r', encoding='utf-8') as f:
|
| 135 |
+
reader = csv.DictReader(f)
|
| 136 |
+
for row in reader:
|
| 137 |
+
track_id = row['file_id']
|
| 138 |
+
tissue = row.get('tissue', '').strip()
|
| 139 |
+
assay = row.get('assay', '').strip()
|
| 140 |
+
experiment_target = row.get('experiment_target', '').strip()
|
| 141 |
+
biosample_type = row.get('biosample_type', '').strip()
|
| 142 |
+
strand = row.get('strand', '').strip()
|
| 143 |
+
|
| 144 |
+
# Build display name from available fields
|
| 145 |
+
parts = []
|
| 146 |
+
if biosample_type and biosample_type != 'tissue':
|
| 147 |
+
parts.append(biosample_type)
|
| 148 |
+
if tissue:
|
| 149 |
+
parts.append(tissue)
|
| 150 |
+
if assay:
|
| 151 |
+
# For RNA-seq, include strand information if available
|
| 152 |
+
if strand:
|
| 153 |
+
if strand == 'plus':
|
| 154 |
+
strand = '+'
|
| 155 |
+
elif strand == 'minus':
|
| 156 |
+
strand = '-'
|
| 157 |
+
parts.append(f"{assay} {strand}")
|
| 158 |
+
else:
|
| 159 |
+
parts.append(assay)
|
| 160 |
+
if experiment_target and experiment_target not in ('none', 'RNA-seq'):
|
| 161 |
+
parts.append(experiment_target)
|
| 162 |
+
|
| 163 |
+
if parts:
|
| 164 |
+
display_name = " - ".join(parts)
|
| 165 |
+
else:
|
| 166 |
+
display_name = track_id # Fallback to ID if no metadata
|
| 167 |
+
|
| 168 |
+
metadata[track_id] = display_name
|
| 169 |
+
except Exception as e:
|
| 170 |
+
print(f"Warning: Could not load track metadata: {e}")
|
| 171 |
+
return {}
|
| 172 |
+
|
| 173 |
+
_TRACK_METADATA_CACHE.update(metadata)
|
| 174 |
+
return metadata
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _get_track_display_name(track_id: str) -> str:
|
| 178 |
+
"""Get display name for a track ID, or return the ID if not found."""
|
| 179 |
+
metadata = _load_track_metadata()
|
| 180 |
+
return metadata.get(track_id, track_id)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _format_track_for_display(track_id: str) -> str:
|
| 184 |
+
"""Format track ID for display: 'display_name (track_id)'."""
|
| 185 |
+
display_name = _get_track_display_name(track_id)
|
| 186 |
+
if display_name == track_id:
|
| 187 |
+
return track_id # No metadata available, just show ID
|
| 188 |
+
return f"{display_name} ({track_id})"
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def _extract_track_id(display_value: str) -> str:
|
| 192 |
+
"""Extract track ID from display format 'display_name (track_id)' or return as-is."""
|
| 193 |
+
if " (" in display_value and display_value.endswith(")"):
|
| 194 |
+
# Extract track_id from format "display_name (track_id)"
|
| 195 |
+
return display_value.rsplit(" (", 1)[1][:-1]
|
| 196 |
+
return display_value # No parentheses, assume it's already just the ID
|
| 197 |
+
|
| 198 |
|
| 199 |
def _get_bigwig_names(species: str) -> list[str]:
|
| 200 |
if species not in _BIGWIG_CACHE:
|
|
|
|
| 263 |
|
| 264 |
|
| 265 |
def search_bigwigs(species: str, query: str):
|
| 266 |
+
"""Search BigWig tracks and return formatted display names."""
|
| 267 |
names = _get_bigwig_names(species)
|
| 268 |
+
# Search in both track IDs and display names
|
| 269 |
+
metadata = _load_track_metadata()
|
| 270 |
+
query_lower = query.lower()
|
| 271 |
+
|
| 272 |
+
# Build list of (display_format, track_id) tuples for searching
|
| 273 |
+
track_display_pairs = []
|
| 274 |
+
for track_id in names:
|
| 275 |
+
display_name = metadata.get(track_id, track_id)
|
| 276 |
+
display_format = _format_track_for_display(track_id)
|
| 277 |
+
track_display_pairs.append((display_format, track_id, display_name))
|
| 278 |
+
|
| 279 |
+
# Filter by query (search in display name, display format, and track_id)
|
| 280 |
+
matching = []
|
| 281 |
+
for display_format, track_id, display_name in track_display_pairs:
|
| 282 |
+
if (query_lower in track_id.lower() or
|
| 283 |
+
query_lower in display_name.lower() or
|
| 284 |
+
query_lower in display_format.lower()):
|
| 285 |
+
matching.append(display_format)
|
| 286 |
+
|
| 287 |
+
# Limit results
|
| 288 |
+
results = matching[:SEARCH_MAX_RESULTS]
|
| 289 |
return gr.update(choices=results, value=[])
|
| 290 |
|
| 291 |
|
| 292 |
def add_selected(current_selected: list[str], to_add: list[str]):
|
| 293 |
+
"""Add tracks to selected list, converting display format to track IDs if needed."""
|
| 294 |
+
# Extract track IDs from current selection (in case they're in display format)
|
| 295 |
+
cur_ids = [_extract_track_id(x) for x in (current_selected or [])]
|
| 296 |
+
cur_display = [_format_track_for_display(tid) for tid in cur_ids]
|
| 297 |
+
|
| 298 |
+
# Extract track IDs from items to add
|
| 299 |
+
to_add_ids = [_extract_track_id(x) for x in (to_add or [])]
|
| 300 |
+
|
| 301 |
+
# Add new track IDs
|
| 302 |
+
for tid in to_add_ids:
|
| 303 |
+
if tid not in cur_ids:
|
| 304 |
+
cur_ids.append(tid)
|
| 305 |
+
cur_display.append(_format_track_for_display(tid))
|
| 306 |
+
|
| 307 |
+
return gr.update(choices=cur_display, value=cur_display) # show + keep all checked
|
| 308 |
|
| 309 |
|
| 310 |
def remove_selected(current_selected: list[str], to_remove: list[str]):
|
| 311 |
+
"""Remove tracks from selected list."""
|
| 312 |
cur = [x for x in (current_selected or []) if x not in set(to_remove or [])]
|
| 313 |
return gr.update(choices=cur, value=cur)
|
| 314 |
|
|
|
|
| 321 |
def reset_on_species_change(species: str):
|
| 322 |
# Clear results + selected when species changes (avoids mismatched IDs)
|
| 323 |
try:
|
| 324 |
+
track_ids = _get_bigwig_names(species) # warms cache if available
|
| 325 |
+
# Format available tracks for display
|
| 326 |
+
formatted_tracks = [_format_track_for_display(tid) for tid in track_ids]
|
| 327 |
+
return (
|
| 328 |
+
gr.update(value=""), # query textbox
|
| 329 |
+
gr.update(choices=[], value=[]), # results list
|
| 330 |
+
gr.update(choices=formatted_tracks, value=[]), # selected list (with formatted names)
|
| 331 |
+
)
|
| 332 |
except (ValueError, AttributeError):
|
| 333 |
# Species doesn't have bigwigs, that's okay
|
| 334 |
+
return (
|
| 335 |
+
gr.update(value=""), # query textbox
|
| 336 |
+
gr.update(choices=[], value=[]), # results list
|
| 337 |
+
gr.update(choices=[], value=[]), # selected list
|
| 338 |
+
)
|
|
|
|
| 339 |
|
| 340 |
|
| 341 |
# -----------------------------
|
|
|
|
| 355 |
if not species:
|
| 356 |
raise gr.Error("Species parameter is missing. Please select a species.")
|
| 357 |
|
| 358 |
+
# Extract track IDs from display format if needed
|
| 359 |
+
bigwig_selected = [_extract_track_id(tid) for tid in bigwig_selected]
|
| 360 |
+
|
| 361 |
if use_coords:
|
| 362 |
# Check if this species supports coordinate-based fetching
|
| 363 |
if species not in SPECIES_WITH_COORDINATE_SUPPORT:
|
|
|
|
| 705 |
|
| 706 |
# Get available BigWig tracks for default species and filter defaults
|
| 707 |
_init_bigwig = _get_bigwig_names(DEFAULT_SPECIES)
|
| 708 |
+
_init_bigwig_selected_ids = [tid for tid in DEFAULT_BIGWIG_TRACKS if tid in _init_bigwig]
|
| 709 |
+
# Format for display
|
| 710 |
+
_init_bigwig_selected = [_format_track_for_display(tid) for tid in _init_bigwig_selected_ids]
|
| 711 |
|
| 712 |
# Filter default BED elements to only those available
|
| 713 |
_init_bed_selected = [elem for elem in DEFAULT_BED_ELEMENTS if elem in _init_bed]
|
|
|
|
| 869 |
|
| 870 |
gr.Markdown("## Select functional tracks")
|
| 871 |
|
| 872 |
+
# Button to download tracks metadata
|
| 873 |
+
def get_metadata_file_path():
|
| 874 |
+
"""Return path to metadata CSV file for download."""
|
| 875 |
+
csv_path = Path(__file__).parent / "data" / "functional_tracks_metadata.csv"
|
| 876 |
+
if csv_path.exists():
|
| 877 |
+
return str(csv_path)
|
| 878 |
+
return None
|
| 879 |
+
|
| 880 |
+
metadata_file_path = get_metadata_file_path()
|
| 881 |
+
download_metadata_btn = gr.Button(
|
| 882 |
+
"📋 Download metadata for all functional tracks",
|
| 883 |
+
variant="secondary",
|
| 884 |
+
visible=metadata_file_path is not None,
|
| 885 |
+
)
|
| 886 |
+
metadata_download_file = gr.File(
|
| 887 |
+
label="Tracks metadata",
|
| 888 |
+
visible=False,
|
| 889 |
+
)
|
| 890 |
+
|
| 891 |
+
def download_metadata():
|
| 892 |
+
"""Return metadata file for download."""
|
| 893 |
+
if metadata_file_path and Path(metadata_file_path).exists():
|
| 894 |
+
return gr.update(value=metadata_file_path, visible=True)
|
| 895 |
+
return gr.update(visible=False)
|
| 896 |
+
|
| 897 |
+
download_metadata_btn.click(
|
| 898 |
+
fn=download_metadata,
|
| 899 |
+
inputs=[],
|
| 900 |
+
outputs=[metadata_download_file],
|
| 901 |
+
)
|
| 902 |
+
|
| 903 |
bigwig_no_tracks_msg = gr.Markdown(
|
| 904 |
"⚠️ No functional genomic tracks available for this species in the current model.",
|
| 905 |
visible=False,
|
|
|
|
| 1005 |
coords = DEFAULT_COORDS.get(species, DEFAULT_COORDS["human"])
|
| 1006 |
# Show coordinates only if species is supported AND use_coords is True
|
| 1007 |
show_coords = is_supported and use_coords_val
|
| 1008 |
+
|
| 1009 |
+
# Format available tracks for display if species has bigwigs
|
| 1010 |
+
if has_bigwigs:
|
| 1011 |
+
try:
|
| 1012 |
+
track_ids = _get_bigwig_names(species)
|
| 1013 |
+
formatted_tracks = [_format_track_for_display(tid) for tid in track_ids]
|
| 1014 |
+
except:
|
| 1015 |
+
formatted_tracks = []
|
| 1016 |
+
else:
|
| 1017 |
+
formatted_tracks = []
|
| 1018 |
+
|
| 1019 |
return (
|
| 1020 |
gr.update(visible=show_coords, value=coords["chrom"]),
|
| 1021 |
gr.update(visible=show_coords, value=coords["start"]),
|
|
|
|
| 1023 |
gr.update(value=is_supported, visible=is_supported), # Show/hide and enable use_coords only if supported
|
| 1024 |
gr.update(visible=show_coords), # Show/hide the row
|
| 1025 |
gr.update(visible=not has_bigwigs), # Show "no tracks" message if no bigwigs
|
| 1026 |
+
gr.update(visible=has_bigwigs, choices=formatted_tracks, value=[]), # Show bigwig selection if available
|
| 1027 |
gr.update(visible=has_bigwigs), # Show bigwig query if available
|
| 1028 |
gr.update(visible=has_bigwigs), # Show bigwig results if available
|
| 1029 |
gr.update(visible=has_bigwigs), # Show bigwig buttons if available
|
data/functional_tracks_metadata.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|