""" Pigeon Pea Pangenome Atlas — Main entry point. Gradio app for exploring the pangenome of 89 pigeon pea lines. """ import os import sys import json import pandas as pd import gradio as gr from src.state import AppState from src.utils import logger, PRECOMPUTED_DIR from src.data_loader import validate_joins from src.callbacks import ( on_line_selected, on_start_journey, build_umap_plot, on_umap_select, on_compare_party, build_donut_chart, build_frequency_histogram, build_treasure_table, on_pin_gene, on_gene_click_table, build_hotspot_heatmap, on_contig_selected, get_protein_stats_html, build_backpack_comparison, build_composition_heatmap, on_open_gene_card, on_download_gene_report, on_generate_report, build_data_health_html, ) from ui.layout import build_app from ui.quest0 import build_globe_figure # =========================================================== # Load precomputed data # =========================================================== logger.info("Loading precomputed data...") DATA = {} def load_data(): """Load all precomputed parquets into memory.""" p = PRECOMPUTED_DIR DATA["gene_freq"] = pd.read_parquet(p / "pav_gene_frequency.parquet") DATA["line_stats"] = pd.read_parquet(p / "line_stats.parquet") # Prefer 3D embedding for the Genetic Landscape chapter emb_3d = p / "line_embedding_3d.parquet" if emb_3d.exists(): DATA["embedding"] = pd.read_parquet(emb_3d) logger.info("Loaded 3D UMAP embedding") else: DATA["embedding"] = pd.read_parquet(p / "line_embedding.parquet") logger.info("Loaded 2D UMAP embedding (3D not available)") DATA["similarity"] = pd.read_parquet(p / "line_similarity_topk.parquet") DATA["gff_index"] = pd.read_parquet(p / "gff_gene_index.parquet") DATA["protein"] = pd.read_parquet(p / "protein_index.parquet") DATA["hotspots"] = pd.read_parquet(p / "hotspot_bins.parquet") DATA["markers"] = pd.read_parquet(p / "cluster_markers.parquet") # Load PAV matrix for live queries pav_path = p / "pav_matrix.parquet" if pav_path.exists(): DATA["pav"] = pd.read_parquet(pav_path) logger.info(f"PAV matrix loaded: {DATA['pav'].shape}") # Load contig index ci_path = p / "genome_contig_index.json" if ci_path.exists(): with open(ci_path) as f: DATA["contig_index"] = json.load(f) logger.info("All data loaded successfully") load_data() # =========================================================== # Derive UI choices # =========================================================== line_choices = sorted(DATA["line_stats"]["line_id"].tolist()) # Top contigs by gene count contig_gene_counts = DATA["gff_index"]["contig_id"].value_counts() contig_choices = contig_gene_counts.head(30).index.tolist() # Gene choices (all genes with protein data) gene_choices = sorted(DATA["protein"]["gene_id"].tolist()) # =========================================================== # Build UI # =========================================================== demo, C, _theme, _css = build_app(line_choices, contig_choices, gene_choices) # =========================================================== # Wire callbacks # =========================================================== with demo: # -- Data Health on load -- try: report = { "Total genes in PAV": len(DATA["gene_freq"]), "Lines": len(DATA["line_stats"]), "Genes with GFF annotation": len(DATA["gff_index"]), "Genes with protein data": len(DATA["protein"]), "Clusters found": DATA["embedding"]["cluster_id"].nunique(), } C["data_health_html"].value = build_data_health_html(report) except Exception as e: C["data_health_html"].value = f"

Error: {e}

" # -- Quest 0: Origins -- # Render globe on tab load C["q0_tab"].select( fn=lambda: build_globe_figure(DATA["line_stats"]), inputs=[], outputs=[C["q0_globe_plot"]], ) # Country filter → update line dropdown choices def on_country_filter(country): ls = DATA["line_stats"] if country and country != "All countries": filtered = ls[ls["country"] == country]["line_id"].tolist() else: filtered = ls["line_id"].tolist() return gr.Dropdown(choices=sorted(filtered), value=None) C["q0_country_dropdown"].change( fn=on_country_filter, inputs=[C["q0_country_dropdown"]], outputs=[C["q0_line_dropdown"]], ) C["q0_line_dropdown"].change( fn=lambda line_id, state: on_line_selected(line_id, state, DATA), inputs=[C["q0_line_dropdown"], C["state"]], outputs=[C["q0_total_genes"], C["q0_unique_genes"], C["q0_nearest_neighbor"], C["state"]], ) C["q0_start_btn"].click( fn=on_start_journey, inputs=[C["state"]], outputs=[C["tabs"], C["state"]], ) # -- Quest 1 -- C["q1_color_radio"].change( fn=lambda color_by, state: build_umap_plot(color_by, state, DATA), inputs=[C["q1_color_radio"], C["state"]], outputs=[C["q1_umap_plot"]], ) # Render UMAP on tab load C["q1_tab"].select( fn=lambda state: build_umap_plot("Country", state, DATA), inputs=[C["state"]], outputs=[C["q1_umap_plot"]], ) # Party selection via add / clear buttons def _party_display_text(party): if party: text = f"Selected {len(party)} lines: " + ", ".join(party[:10]) if len(party) > 10: text += f" ... +{len(party) - 10} more" return text return "None selected" def on_add_to_party(selected_line, state): if state is None: state = AppState() if selected_line and selected_line not in state.selected_party: state.selected_party.append(selected_line) return _party_display_text(state.selected_party), state def on_clear_party(state): if state is None: state = AppState() state.selected_party = [] return "None selected", state C["q1_add_party_btn"].click( fn=on_add_to_party, inputs=[C["q1_party_dropdown"], C["state"]], outputs=[C["q1_party_display"], C["state"]], ) C["q1_clear_party_btn"].click( fn=on_clear_party, inputs=[C["state"]], outputs=[C["q1_party_display"], C["state"]], ) def on_compare_click(state): fig, _ = on_compare_party(state, DATA) return gr.Plot(value=fig, visible=True) C["q1_compare_btn"].click( fn=on_compare_click, inputs=[C["state"]], outputs=[C["q1_comparison_plot"]], ) # -- Quest 2 -- def update_quest2(core_thresh, cloud_thresh, filter_type, state): donut = build_donut_chart(core_thresh, cloud_thresh, DATA) hist = build_frequency_histogram(core_thresh, cloud_thresh, DATA) table = build_treasure_table(state, core_thresh, cloud_thresh, filter_type, DATA) return donut, hist, table for trigger in [C["q2_core_slider"], C["q2_cloud_slider"]]: trigger.change( fn=update_quest2, inputs=[C["q2_core_slider"], C["q2_cloud_slider"], C["q2_filter_radio"], C["state"]], outputs=[C["q2_donut_plot"], C["q2_histogram_plot"], C["q2_treasure_table"]], ) C["q2_filter_radio"].change( fn=update_quest2, inputs=[C["q2_core_slider"], C["q2_cloud_slider"], C["q2_filter_radio"], C["state"]], outputs=[C["q2_donut_plot"], C["q2_histogram_plot"], C["q2_treasure_table"]], ) # Render Quest 2 on tab select C["q2_tab"].select( fn=update_quest2, inputs=[C["q2_core_slider"], C["q2_cloud_slider"], C["q2_filter_radio"], C["state"]], outputs=[C["q2_donut_plot"], C["q2_histogram_plot"], C["q2_treasure_table"]], ) def _backpack_text(state): """Shared helper to format backpack display string.""" if not state or not state.backpack_genes: return "Empty — select a gene above and click 'Pin this gene to Backpack'" return ", ".join(state.backpack_genes) def on_q2_pin(gene_id, state): bp_text, state = on_pin_gene(gene_id, state) q4_bp = _backpack_text(state) picker = gr.Dropdown(choices=state.backpack_genes if state else []) return bp_text, q4_bp, picker, state C["q2_pin_btn"].click( fn=on_q2_pin, inputs=[C["q2_selected_gene_text"], C["state"]], outputs=[C["q2_backpack_display"], C["q4_backpack_display"], C["q4_backpack_picker"], C["state"]], ) # Table row click → select gene def on_table_select(state, evt: gr.SelectData): gene_id = str(evt.value) if evt else "" return gene_id, state C["q2_treasure_table"].select( fn=on_table_select, inputs=[C["state"]], outputs=[C["q2_selected_gene_text"], C["state"]], ) # -- Quest 3 -- C["q3_tab"].select( fn=lambda: build_hotspot_heatmap(DATA), inputs=[], outputs=[C["q3_heatmap_plot"]], ) C["q3_contig_dropdown"].change( fn=lambda contig_id, state: on_contig_selected(contig_id, DATA, state), inputs=[C["q3_contig_dropdown"], C["state"]], outputs=[C["q3_track_plot"], C["q3_region_table"]], ) # -- Quest 4 -- # Track the currently loaded gene in Quest 4 via state def _load_gene(gene_id): """Load protein stats for a gene ID.""" if not gene_id or not gene_id.strip(): return get_protein_stats_html("", DATA), "" gid = gene_id.strip() return get_protein_stats_html(gid, DATA), gid # Load button: type a gene ID and click Load C["q4_load_btn"].click( fn=_load_gene, inputs=[C["q4_gene_input"]], outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]], ) # Also trigger on Enter key in the textbox C["q4_gene_input"].submit( fn=_load_gene, inputs=[C["q4_gene_input"]], outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]], ) # Backpack quick-pick dropdown C["q4_backpack_picker"].change( fn=lambda gene_id: (get_protein_stats_html(gene_id, DATA), gene_id or ""), inputs=[C["q4_backpack_picker"]], outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]], ) # Hidden dropdown receives value from Gene Card "Show Protein" button C["q4_gene_dropdown"].change( fn=lambda gene_id: (get_protein_stats_html(gene_id, DATA), gene_id or ""), inputs=[C["q4_gene_dropdown"]], outputs=[C["q4_protein_stats_html"], C["q4_gene_input"]], ) def on_q4_pin(gene_id, state): """Pin a gene from Quest 4, update both backpack displays and charts.""" gid = gene_id.strip() if gene_id else "" bp_text, state = on_pin_gene(gid, state) radar = build_backpack_comparison(state, DATA) heatmap = build_composition_heatmap(state, DATA) q4_bp = _backpack_text(state) # Update the backpack picker choices picker_update = gr.Dropdown(choices=state.backpack_genes if state else []) return bp_text, q4_bp, radar, heatmap, picker_update, state C["q4_pin_btn"].click( fn=on_q4_pin, inputs=[C["q4_gene_input"], C["state"]], outputs=[ C["q2_backpack_display"], C["q4_backpack_display"], C["q4_comparison_bar_plot"], C["q4_composition_heatmap"], C["q4_backpack_picker"], C["state"], ], ) def _on_q4_tab_select(state): bp_genes = state.backpack_genes if state else [] return ( build_backpack_comparison(state, DATA), build_composition_heatmap(state, DATA), _backpack_text(state), gr.Dropdown(choices=bp_genes), ) C["q4_tab"].select( fn=_on_q4_tab_select, inputs=[C["state"]], outputs=[C["q4_comparison_bar_plot"], C["q4_composition_heatmap"], C["q4_backpack_display"], C["q4_backpack_picker"]], ) # -- Gene Card -- C["gc_show_genome_btn"].click( fn=lambda state: ( gr.Tabs(selected="quest3"), state.selected_gene if state and state.selected_gene else None, ), inputs=[C["state"]], outputs=[C["tabs"], C["q3_contig_dropdown"]], ) C["gc_show_protein_btn"].click( fn=lambda state: ( gr.Tabs(selected="quest4"), state.selected_gene if state and state.selected_gene else None, ), inputs=[C["state"]], outputs=[C["tabs"], C["q4_gene_dropdown"]], ) def on_gc_pin(state): bp_text, state = on_pin_gene(state.selected_gene if state else "", state) q4_bp = _backpack_text(state) picker = gr.Dropdown(choices=state.backpack_genes if state else []) return bp_text, q4_bp, picker, state C["gc_pin_card_btn"].click( fn=on_gc_pin, inputs=[C["state"]], outputs=[C["q2_backpack_display"], C["q4_backpack_display"], C["q4_backpack_picker"], C["state"]], ) C["gc_download_gene_btn"].click( fn=lambda state: on_download_gene_report(state, DATA), inputs=[C["state"]], outputs=[C["gc_gene_report_file"]], ) # -- Final Report -- C["final_generate_btn"].click( fn=lambda state: on_generate_report(state, DATA), inputs=[C["state"]], outputs=[ C["final_report_md"], C["final_download_json"], C["final_download_csv"], C["final_achievements_html"], C["state"], ], ) # =========================================================== # Launch # =========================================================== if __name__ == "__main__": # Gradio 6.x moved theme/css from Blocks() to launch(). # ssr_mode=False prevents the SSR localhost-access crash on HF Spaces. demo.launch(ssr_mode=False, theme=_theme, css=_css)