Ashkan Taghipour (The University of Western Australia)
Fix Your Report tab: remove duplicate outputs causing Gradio error
9fc7d91
"""
Pigeon Pea Pangenome Atlas — Main entry point.
Gradio app for exploring the pangenome of 89 pigeon pea lines.
"""
import os
import sys
import json
import pandas as pd
import gradio as gr
from src.state import AppState
from src.utils import logger, PRECOMPUTED_DIR
from src.data_loader import validate_joins
from src.callbacks import (
on_line_selected, on_start_journey,
build_umap_plot, on_umap_select, on_compare_party,
build_donut_chart, build_frequency_histogram, build_treasure_table,
on_pin_gene, on_gene_click_table,
build_hotspot_heatmap, on_contig_selected,
get_protein_stats_html, build_backpack_comparison, build_composition_heatmap,
on_open_gene_card, on_download_gene_report,
on_generate_report, build_data_health_html,
)
from ui.layout import build_app
from ui.quest0 import build_globe_figure
# ===========================================================
# Load precomputed data
# ===========================================================
logger.info("Loading precomputed data...")
DATA = {}
def load_data():
"""Load all precomputed parquets into memory."""
p = PRECOMPUTED_DIR
DATA["gene_freq"] = pd.read_parquet(p / "pav_gene_frequency.parquet")
DATA["line_stats"] = pd.read_parquet(p / "line_stats.parquet")
# Prefer 3D embedding for the Genetic Landscape chapter
emb_3d = p / "line_embedding_3d.parquet"
if emb_3d.exists():
DATA["embedding"] = pd.read_parquet(emb_3d)
logger.info("Loaded 3D UMAP embedding")
else:
DATA["embedding"] = pd.read_parquet(p / "line_embedding.parquet")
logger.info("Loaded 2D UMAP embedding (3D not available)")
DATA["similarity"] = pd.read_parquet(p / "line_similarity_topk.parquet")
DATA["gff_index"] = pd.read_parquet(p / "gff_gene_index.parquet")
DATA["protein"] = pd.read_parquet(p / "protein_index.parquet")
DATA["hotspots"] = pd.read_parquet(p / "hotspot_bins.parquet")
DATA["markers"] = pd.read_parquet(p / "cluster_markers.parquet")
# Load PAV matrix for live queries
pav_path = p / "pav_matrix.parquet"
if pav_path.exists():
DATA["pav"] = pd.read_parquet(pav_path)
logger.info(f"PAV matrix loaded: {DATA['pav'].shape}")
# Load contig index
ci_path = p / "genome_contig_index.json"
if ci_path.exists():
with open(ci_path) as f:
DATA["contig_index"] = json.load(f)
logger.info("All data loaded successfully")
load_data()
# ===========================================================
# Derive UI choices
# ===========================================================
line_choices = sorted(DATA["line_stats"]["line_id"].tolist())
# Top contigs by gene count
contig_gene_counts = DATA["gff_index"]["contig_id"].value_counts()
contig_choices = contig_gene_counts.head(30).index.tolist()
# Gene choices (all genes with protein data)
gene_choices = sorted(DATA["protein"]["gene_id"].tolist())
# ===========================================================
# Build UI
# ===========================================================
demo, C, _theme, _css = build_app(line_choices, contig_choices, gene_choices)
# ===========================================================
# Wire callbacks
# ===========================================================
with demo:
# -- Data Health on load --
try:
report = {
"Total genes in PAV": len(DATA["gene_freq"]),
"Lines": len(DATA["line_stats"]),
"Genes with GFF annotation": len(DATA["gff_index"]),
"Genes with protein data": len(DATA["protein"]),
"Clusters found": DATA["embedding"]["cluster_id"].nunique(),
}
C["data_health_html"].value = build_data_health_html(report)
except Exception as e:
C["data_health_html"].value = f"<p>Error: {e}</p>"
# -- Quest 0: Origins --
# Render globe on tab load
C["q0_tab"].select(
fn=lambda: build_globe_figure(DATA["line_stats"]),
inputs=[],
outputs=[C["q0_globe_plot"]],
)
# Country filter → update line dropdown choices
def on_country_filter(country):
ls = DATA["line_stats"]
if country and country != "All countries":
filtered = ls[ls["country"] == country]["line_id"].tolist()
else:
filtered = ls["line_id"].tolist()
return gr.Dropdown(choices=sorted(filtered), value=None)
C["q0_country_dropdown"].change(
fn=on_country_filter,
inputs=[C["q0_country_dropdown"]],
outputs=[C["q0_line_dropdown"]],
)
C["q0_line_dropdown"].change(
fn=lambda line_id, state: on_line_selected(line_id, state, DATA),
inputs=[C["q0_line_dropdown"], C["state"]],
outputs=[C["q0_total_genes"], C["q0_unique_genes"],
C["q0_nearest_neighbor"], C["state"]],
)
C["q0_start_btn"].click(
fn=on_start_journey,
inputs=[C["state"]],
outputs=[C["tabs"], C["state"]],
)
# -- Quest 1 --
C["q1_color_radio"].change(
fn=lambda color_by, state: build_umap_plot(color_by, state, DATA),
inputs=[C["q1_color_radio"], C["state"]],
outputs=[C["q1_umap_plot"]],
)
# Render UMAP on tab load
C["q1_tab"].select(
fn=lambda state: build_umap_plot("Country", state, DATA),
inputs=[C["state"]],
outputs=[C["q1_umap_plot"]],
)
# Party selection via add / clear buttons
def _party_display_text(party):
if party:
text = f"Selected {len(party)} lines: " + ", ".join(party[:10])
if len(party) > 10:
text += f" ... +{len(party) - 10} more"
return text
return "None selected"
def on_add_to_party(selected_line, state):
if state is None:
state = AppState()
if selected_line and selected_line not in state.selected_party:
state.selected_party.append(selected_line)
return _party_display_text(state.selected_party), state
def on_clear_party(state):
if state is None:
state = AppState()
state.selected_party = []
return "None selected", state
C["q1_add_party_btn"].click(
fn=on_add_to_party,
inputs=[C["q1_party_dropdown"], C["state"]],
outputs=[C["q1_party_display"], C["state"]],
)
C["q1_clear_party_btn"].click(
fn=on_clear_party,
inputs=[C["state"]],
outputs=[C["q1_party_display"], C["state"]],
)
def on_compare_click(state):
fig, _ = on_compare_party(state, DATA)
return gr.Plot(value=fig, visible=True)
C["q1_compare_btn"].click(
fn=on_compare_click,
inputs=[C["state"]],
outputs=[C["q1_comparison_plot"]],
)
# -- Quest 2 --
def update_quest2(core_thresh, cloud_thresh, filter_type, state):
donut = build_donut_chart(core_thresh, cloud_thresh, DATA)
hist = build_frequency_histogram(core_thresh, cloud_thresh, DATA)
table = build_treasure_table(state, core_thresh, cloud_thresh, filter_type, DATA)
return donut, hist, table
for trigger in [C["q2_core_slider"], C["q2_cloud_slider"]]:
trigger.change(
fn=update_quest2,
inputs=[C["q2_core_slider"], C["q2_cloud_slider"],
C["q2_filter_radio"], C["state"]],
outputs=[C["q2_donut_plot"], C["q2_histogram_plot"],
C["q2_treasure_table"]],
)
C["q2_filter_radio"].change(
fn=update_quest2,
inputs=[C["q2_core_slider"], C["q2_cloud_slider"],
C["q2_filter_radio"], C["state"]],
outputs=[C["q2_donut_plot"], C["q2_histogram_plot"],
C["q2_treasure_table"]],
)
# Render Quest 2 on tab select
C["q2_tab"].select(
fn=update_quest2,
inputs=[C["q2_core_slider"], C["q2_cloud_slider"],
C["q2_filter_radio"], C["state"]],
outputs=[C["q2_donut_plot"], C["q2_histogram_plot"],
C["q2_treasure_table"]],
)
def _backpack_text(state):
"""Shared helper to format backpack display string."""
if not state or not state.backpack_genes:
return "Empty — select a gene above and click 'Pin this gene to Backpack'"
return ", ".join(state.backpack_genes)
def on_q2_pin(gene_id, state):
bp_text, state = on_pin_gene(gene_id, state)
q4_bp = _backpack_text(state)
picker = gr.Dropdown(choices=state.backpack_genes if state else [])
return bp_text, q4_bp, picker, state
C["q2_pin_btn"].click(
fn=on_q2_pin,
inputs=[C["q2_selected_gene_text"], C["state"]],
outputs=[C["q2_backpack_display"], C["q4_backpack_display"],
C["q4_backpack_picker"], C["state"]],
)
# Table row click → select gene
def on_table_select(state, evt: gr.SelectData):
gene_id = str(evt.value) if evt else ""
return gene_id, state
C["q2_treasure_table"].select(
fn=on_table_select,
inputs=[C["state"]],
outputs=[C["q2_selected_gene_text"], C["state"]],
)
# -- Quest 3 --
C["q3_tab"].select(
fn=lambda: build_hotspot_heatmap(DATA),
inputs=[],
outputs=[C["q3_heatmap_plot"]],
)
C["q3_contig_dropdown"].change(
fn=lambda contig_id, state: on_contig_selected(contig_id, DATA, state),
inputs=[C["q3_contig_dropdown"], C["state"]],
outputs=[C["q3_track_plot"], C["q3_region_table"]],
)
# -- Quest 4 --
# Track the currently loaded gene in Quest 4 via state
def _load_gene(gene_id):
"""Load protein stats for a gene ID."""
if not gene_id or not gene_id.strip():
return get_protein_stats_html("", DATA), ""
gid = gene_id.strip()
return get_protein_stats_html(gid, DATA), gid
# Load button: type a gene ID and click Load
C["q4_load_btn"].click(
fn=_load_gene,
inputs=[C["q4_gene_input"]],
outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]],
)
# Also trigger on Enter key in the textbox
C["q4_gene_input"].submit(
fn=_load_gene,
inputs=[C["q4_gene_input"]],
outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]],
)
# Backpack quick-pick dropdown
C["q4_backpack_picker"].change(
fn=lambda gene_id: (get_protein_stats_html(gene_id, DATA), gene_id or ""),
inputs=[C["q4_backpack_picker"]],
outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]],
)
# Hidden dropdown receives value from Gene Card "Show Protein" button
C["q4_gene_dropdown"].change(
fn=lambda gene_id: (get_protein_stats_html(gene_id, DATA), gene_id or ""),
inputs=[C["q4_gene_dropdown"]],
outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]],
)
def on_q4_pin(gene_id, state):
"""Pin a gene from Quest 4, update both backpack displays and charts."""
gid = gene_id.strip() if gene_id else ""
bp_text, state = on_pin_gene(gid, state)
radar = build_backpack_comparison(state, DATA)
heatmap = build_composition_heatmap(state, DATA)
q4_bp = _backpack_text(state)
# Update the backpack picker choices
picker_update = gr.Dropdown(choices=state.backpack_genes if state else [])
return bp_text, q4_bp, radar, heatmap, picker_update, state
C["q4_pin_btn"].click(
fn=on_q4_pin,
inputs=[C["q4_gene_input"], C["state"]],
outputs=[
C["q2_backpack_display"],
C["q4_backpack_display"],
C["q4_comparison_bar_plot"],
C["q4_composition_heatmap"],
C["q4_backpack_picker"],
C["state"],
],
)
def _on_q4_tab_select(state):
bp_genes = state.backpack_genes if state else []
return (
build_backpack_comparison(state, DATA),
build_composition_heatmap(state, DATA),
_backpack_text(state),
gr.Dropdown(choices=bp_genes),
)
C["q4_tab"].select(
fn=_on_q4_tab_select,
inputs=[C["state"]],
outputs=[C["q4_comparison_bar_plot"], C["q4_composition_heatmap"],
C["q4_backpack_display"], C["q4_backpack_picker"]],
)
# -- Gene Card --
C["gc_show_genome_btn"].click(
fn=lambda state: (
gr.Tabs(selected="quest3"),
state.selected_gene if state and state.selected_gene else None,
),
inputs=[C["state"]],
outputs=[C["tabs"], C["q3_contig_dropdown"]],
)
C["gc_show_protein_btn"].click(
fn=lambda state: (
gr.Tabs(selected="quest4"),
state.selected_gene if state and state.selected_gene else None,
),
inputs=[C["state"]],
outputs=[C["tabs"], C["q4_gene_dropdown"]],
)
def on_gc_pin(state):
bp_text, state = on_pin_gene(state.selected_gene if state else "", state)
q4_bp = _backpack_text(state)
picker = gr.Dropdown(choices=state.backpack_genes if state else [])
return bp_text, q4_bp, picker, state
C["gc_pin_card_btn"].click(
fn=on_gc_pin,
inputs=[C["state"]],
outputs=[C["q2_backpack_display"], C["q4_backpack_display"],
C["q4_backpack_picker"], C["state"]],
)
C["gc_download_gene_btn"].click(
fn=lambda state: on_download_gene_report(state, DATA),
inputs=[C["state"]],
outputs=[C["gc_gene_report_file"]],
)
# -- Final Report --
C["final_generate_btn"].click(
fn=lambda state: on_generate_report(state, DATA),
inputs=[C["state"]],
outputs=[
C["final_report_md"],
C["final_download_json"],
C["final_download_csv"],
C["final_achievements_html"],
C["state"],
],
)
# ===========================================================
# Launch
# ===========================================================
if __name__ == "__main__":
# Gradio 6.x moved theme/css from Blocks() to launch().
# ssr_mode=False prevents the SSR localhost-access crash on HF Spaces.
demo.launch(ssr_mode=False, theme=_theme, css=_css)