Spaces:

Eitan177
/

InSilicoControl

Running

Halper-Stromberg

Add window-based variant placement strategy for Direct Probe Coordinates mode

2a68195 8 days ago

34.2 kB

	import streamlit as st
	import os
	import tempfile
	import subprocess
	from pathlib import Path

	import pipeline as pl
	import json
	import shutil

	def render_igv(res):
	work_dir_name = res.get("work_dir_name")
	genome_version = res.get("genome_version", "hg38")
	if not work_dir_name:
	st.warning("No static files path found for IGV.js. Restart the pipeline to generate.")
	return

	igv_bed_path = Path(res["igv_bed_path"])
	variants = []
	if igv_bed_path.exists():
	with open(igv_bed_path, "r") as f:
	for line in f:
	if line.startswith("#") or not line.strip():
	continue
	parts = line.strip().split("\t")
	if len(parts) >= 4:
	chrom = parts[0]
	start = int(parts[1])
	end = int(parts[2])
	label = parts[3]
	locus = f"{chrom}:{max(1, start-50)}-{end+50}"
	variants.append({
	"locus": locus,
	"name": label.split("_")[-1],
	"label": label,
	"pos_label": f"{chrom}:{start+1}"
	})

	variants_json = json.dumps(variants)

	bam_url = f"/app/static/{work_dir_name}/synthetic.sorted.bam"
	bai_url = f"/app/static/{work_dir_name}/synthetic.sorted.bam.bai"
	vcf_url = f"/app/static/{work_dir_name}/synthetic.vcf"
	navigator_url = f"/app/static/{work_dir_name}/igv_variant_navigator.bed"
	probes_url = f"/app/static/{work_dir_name}/fully_covered_exons.bed"
	mane_url = f"/app/static/{work_dir_name}/mane_transcripts.bed"

	probes_bed_exists = res.get("fully_covered_bed_path") is not None and Path(res["fully_covered_bed_path"]).exists()
	mane_transcripts_exists = res.get("mane_transcripts_bed_path") is not None and Path(res["mane_transcripts_bed_path"]).exists()

	tracks = [
	{
	"name": "Reference",
	"type": "sequence",
	"order": 1
	}
	]
	if mane_transcripts_exists:
	tracks.append({
	"name": "MANE Transcripts",
	"type": "annotation",
	"format": "bed",
	"url": mane_url,
	"indexed": False,
	"order": 1.5,
	"color": "green",
	"displayMode": "EXPANDED"
	})
	if probes_bed_exists:
	tracks.append({
	"name": "Probes BED",
	"type": "annotation",
	"format": "bed",
	"url": probes_url,
	"indexed": False,
	"order": 2,
	"color": "blue"
	})
	tracks.extend([
	{
	"name": "Variant Navigator BED",
	"type": "annotation",
	"format": "bed",
	"url": navigator_url,
	"indexed": False,
	"order": 3,
	"color": "red"
	},
	{
	"name": "Synthetic VCF",
	"type": "variant",
	"format": "vcf",
	"url": vcf_url,
	"indexed": False,
	"order": 4
	},
	{
	"name": "Synthetic BAM",
	"type": "alignment",
	"format": "bam",
	"url": bam_url,
	"indexURL": bai_url,
	"order": 5,
	"height": 300
	}
	])
	tracks_json = json.dumps(tracks)

	html_content = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">
	<style>
	body {{
	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
	margin: 0;
	padding: 0;
	display: flex;
	height: 600px;
	background-color: #ffffff;
	}}
	#sidebar {{
	width: 250px;
	border-right: 1px solid #e0e0e0;
	display: flex;
	flex-direction: column;
	height: 100%;
	background-color: #f8f9fa;
	}}
	#sidebar-header {{
	padding: 10px;
	background-color: #0e1117;
	color: white;
	font-weight: bold;
	font-size: 14px;
	}}
	#variant-list {{
	flex-grow: 1;
	overflow-y: auto;
	padding: 5px;
	}}
	.variant-item {{
	padding: 8px 10px;
	margin-bottom: 4px;
	border-radius: 4px;
	cursor: pointer;
	border: 1px solid #e9ecef;
	background-color: white;
	font-size: 12px;
	transition: background-color 0.2s;
	}}
	.variant-item:hover {{
	background-color: #e9ecef;
	}}
	.variant-name {{
	font-weight: bold;
	color: #ff4b4b;
	}}
	.variant-pos {{
	color: #6c757d;
	margin-top: 2px;
	}}
	#igv-container {{
	flex-grow: 1;
	height: 100%;
	overflow: hidden;
	}}
	#igv-div {{
	height: 600px;
	width: 100%;
	}}
	</style>
	</head>
	<body>
	<div id="sidebar">
	<div id="sidebar-header"><i class="fas fa-list"></i> Variant Navigator ({len(variants)})</div>
	<div id="variant-list"></div>
	</div>
	<div id="igv-container">
	<div id="igv-div"></div>
	</div>

	<script src="https://cdn.jsdelivr.net/npm/igv@2.15.5/dist/igv.min.js"></script>
	<script>
	var variants = {variants_json};

	var listContainer = document.getElementById("variant-list");
	variants.forEach(function(v, index) {{
	var item = document.createElement("div");
	item.className = "variant-item";
	item.innerHTML = '<div class="variant-name">' + v.name.toUpperCase() + '</div>' +
	'<div class="variant-pos">' + v.pos_label + '</div>';
	item.onclick = function() {{
	if (window.igvBrowser) {{
	window.igvBrowser.search(v.locus);
	}}
	}};
	listContainer.appendChild(item);
	}});

	var options = {{
	genome: "{genome_version}",
	locus: variants.length > 0 ? variants[0].locus : "chr1:1787315-1787437",
	tracks: {tracks_json}
	}};

	var igvDiv = document.getElementById("igv-div");
	igv.createBrowser(igvDiv, options)
	.then(function (browser) {{
	window.igvBrowser = browser;
	console.log("IGV browser created successfully.");
	}})
	.catch(function(err) {{
	console.error("Error creating IGV browser:", err);
	document.getElementById("igv-div").innerHTML =
	"<div style='color:#721c24; background-color:#f8d7da; border:1px solid #f5c6cb; padding:20px; border-radius:4px; font-family:sans-serif; margin:20px;'>" +
	"<h3>❌ Error Loading IGV Browser</h3>" +
	"<p><b>Message:</b> " + err.toString() + "</p>" +
	"<p>This usually indicates static file serving is not enabled or files are not accessible.</p>" +
	"<p><b>Paths attempted:</b></p>" +
	"<ul>" +
	"<li>BAM: <code>" + options.tracks[4].url + "</code></li>" +
	"<li>VCF: <code>" + options.tracks[3].url + "</code></li>" +
	"<li>BED: <code>" + options.tracks[1].url + "</code></li>" +
	"</ul>" +
	"<p>Please verify that <code>enableStaticServing = true</code> is active and the Hugging Face Space has fully rebuilt.</p>" +
	"</div>";
	}});
	</script>
	</body>
	</html>
	"""
	st.components.v1.html(html_content, height=620, scrolling=False)


	st.set_page_config(
	page_title="In Silico Controls Generator",
	page_icon="🧬",
	layout="wide",
	)

	st.title("🧬 In Silico Controls Generator")
	st.caption(
	"Generate synthetic BAM + VCF files with realistic variants "
	"derived from your probe panel and MANE exon annotations."
	)

	# ── Sidebar: parameters ──────────────────────────────────────────────────────

	uploaded_bed = st.session_state.get("uploaded_bed_file")

	with st.sidebar:
	st.header("Pipeline Parameters")

	st.subheader("Sequencing Parameters")
	depth = st.number_input("Target read depth per variant", min_value=1, max_value=10000, value=100, step=10)
	vaf = st.slider("Variant allele frequency (VAF)", min_value=0.01, max_value=1.0, value=0.20, step=0.01, format="%.2f")
	read_length = st.number_input("Read length (bp)", min_value=50, max_value=300, value=150, step=10)

	st.subheader("Sequencing Technology")
	seq_mode = st.radio(
	"Sequencing assay style",
	options=["Hybrid Capture (Staggered reads)", "PCR Amplicon (Identical start/ends)"],
	index=0,
	help="Hybrid Capture simulates sheared fragments with staggered read start/end coordinates. PCR Amplicon simulates amplicon sequencing where all reads start and end exactly at the probe/target coordinate boundaries."
	)

	if seq_mode == "Hybrid Capture (Staggered reads)":
	st.subheader("Fragment Insert Size")
	insert_size = st.number_input("Mean insert size (bp)", min_value=100, max_value=1000, value=379, step=10)
	insert_std = st.number_input("Insert size std dev (bp)", min_value=0, max_value=200, value=20, step=5)
	else:
	insert_size = 250
	insert_std = 0

	st.subheader("Indel Parameters")
	indel_interval = st.number_input(
	"Indel interval (0 = SNVs only)",
	min_value=0, max_value=100, value=10, step=1,
	help="Make every Nth variant an indel. Set to 0 to generate only SNVs.",
	)

	st.divider()
	st.subheader("Reference Genome")
	genome_version = st.selectbox(
	"Genome assembly",
	options=["hg38", "hg19"],
	index=0,
	help="Choose the reference genome version (hg38 or hg19)."
	)
	ref_mode = st.radio(
	"FASTA source",
	options=["Use cached / download", "Custom path"],
	help="Downloads and caches the selected assembly, or lets you point to a custom local path.",
	)
	custom_ref_path = ""
	if ref_mode == "Custom path":
	custom_ref_path = st.text_input(
	f"Path to {genome_version}.fa",
	placeholder=f"/data/references/{genome_version}.fa",
	help="Must be an indexed FASTA (.fa + .fa.fai).",
	)

	st.divider()
	st.subheader("Targeting Mode")
	if genome_version == "hg19":
	st.info("ℹ️ MANE transcript annotations are hg38-only. Targeting mode is set to Direct Probe Coordinates for hg19.")
	target_mode = "Direct Probe Coordinates"
	else:
	target_mode = st.radio(
	"Variant targeting logic",
	options=["MANE Transcript Exons/Introns", "Direct Probe Coordinates"],
	help="MANE Transcripts Mode places variants in coding exons and flanking introns of protein-coding genes. Direct Probe Mode places a single variant inside each probe coordinate itself, completely ignoring gene annotations."
	)

	if target_mode == "MANE Transcript Exons/Introns":
	st.subheader("Variant Locations")
	include_cds = st.checkbox("Generate CDS variants", value=True, help="Place variants in the coding sequence (CDS) of MANE exons.")
	include_intron = st.checkbox("Generate flanking intronic variants", value=True, help="Place variants in the flanking introns of MANE exons.")
	include_offtarget = st.checkbox("Generate off-target (unused probe) variants", value=True, help="Place variants in the midpoint of probes with no MANE exon coverage.")
	direct_window_size = 0
	else:
	include_cds = False
	include_intron = False
	include_offtarget = False

	st.subheader("Direct Probe Settings")
	direct_variant_strategy = st.radio(
	"Variant placement strategy",
	options=["One variant per N bp window", "Single random variant per probe"],
	index=0,
	help="Choose whether to generate one variant per N bp window across the probe coordinates or a single random variant per probe."
	)
	if direct_variant_strategy == "One variant per N bp window":
	direct_window_size = st.number_input(
	"Window size (bp)",
	min_value=1, max_value=1000, value=10, step=1,
	help="Place one variant randomly inside each non-overlapping window of this size."
	)
	else:
	direct_window_size = 0

	st.subheader("Read Group")
	rg_id = st.text_input("Read Group ID", value="CPDV2510843-SEQ-251103")
	rg_sm = st.text_input("Sample Name", value="CPDV2510843-SEQ-251103")

	st.divider()
	st.subheader("🛠️ Debug Info")
	st.caption("Helpful diagnostics for troubleshooting deployment status.")
	st.write("Streamlit Version:", st.__version__)
	st.write("File Uploaded:", uploaded_bed is not None)
	if uploaded_bed:
	st.write("Filename:", uploaded_bed.name)
	st.write("probes_df in state:", "probes_df" in st.session_state)
	if "probes_df" in st.session_state:
	st.write("probes_df count:", len(st.session_state["probes_df"]))
	import os
	st.write("CWD:", os.getcwd())
	st.write("Script Path:", __file__)
	st.write("static/ exists:", os.path.exists("static"))
	st.write("src/static/ exists:", os.path.exists("src/static"))
	if os.path.exists("static"):
	st.write("static/ folders:", os.listdir("static")[:5])
	if os.path.exists("src/static"):
	st.write("src/static/ folders:", os.listdir("src/static")[:5])

	# ── Main area ────────────────────────────────────────────────────────────────

	col_upload, col_info = st.columns([2, 1])

	with col_upload:
	st.header("1 · Upload Probes BED")
	uploaded_bed = st.file_uploader(
	"Upload your probes BED file",
	type=["bed"],
	key="uploaded_bed_file",
	help="Standard BED3+ format (chrom, start, end, ...)",
	)

	with col_info:
	st.header("Cache Status")
	mane_cached = pl.MANE_BED12.exists()
	ref_cached = (pl.HG38_FA.exists() and pl.HG38_FAI.exists()) if genome_version == "hg38" else (pl.HG19_FA.exists() and pl.HG19_FAI.exists())
	bigbed_cached = pl.BIGBEDTOBED_PATH.exists()

	st.markdown(f"{'✅' if bigbed_cached else '⬜'} bigBedToBed")
	st.markdown(f"{'✅' if mane_cached else '⬜'} MANE annotation")
	st.markdown(f"{'✅' if ref_cached else '⬜'} {genome_version} reference")

	if not ref_cached and ref_mode == "Use cached / download":
	st.warning(f"{genome_version} not cached. First run will download and index the assembly, which may take 5–10 minutes.")

	# ── Step 1.5: Customize Probes ────────────────────────────────────────────────
	if uploaded_bed:
	if "probes_df" not in st.session_state or st.session_state.get("uploaded_file_name") != uploaded_bed.name:
	import pandas as pd
	import io

	try:
	uploaded_bed.seek(0)
	content = uploaded_bed.read().decode("utf-8", errors="ignore")
	# Strip comments and headers
	lines = [line for line in content.splitlines() if line.strip() and not line.startswith("#") and not line.startswith("track")]

	if lines:
	df = pd.read_csv(io.StringIO("\n".join(lines)), sep="\t", header=None)
	cols = ["chrom", "start", "end"]
	if len(df.columns) > 3:
	cols += [f"col_{i}" for i in range(3, len(df.columns))]
	df.columns = cols[:len(df.columns)]
	df.insert(0, "Select", True)
	st.session_state["probes_df"] = df
	st.session_state["uploaded_file_name"] = uploaded_bed.name
	st.session_state.pop("sample_seed", None)
	st.session_state.pop("prev_frac", None)
	else:
	st.error("Uploaded BED file appears to be empty or contains only comments.")
	except Exception as e:
	st.error(f"Error parsing BED file: {e}")

	if "probes_df" in st.session_state:
	df = st.session_state["probes_df"]

	st.header("1.5 · Customize Probes")
	st.caption(f"Loaded {len(df):,} probes from {uploaded_bed.name}. Customize which regions will be processed below.")

	col_mode, col_rand = st.columns([1, 1])

	with col_mode:
	subset_mode = st.radio(
	"Selection mode",
	options=["All Probes", "Manual Selection (below)", "Random Sampling"],
	index=0,
	help="Choose whether to run all probes, manually check/uncheck probes in the list, or select a random fraction of the probes."
	)

	with col_rand:
	if subset_mode == "Random Sampling":
	sample_frac = st.slider("Fraction of probes to keep", min_value=0.01, max_value=1.00, value=0.10, step=0.01)
	resample_btn = st.button("🎲 Resample")

	if "sample_seed" not in st.session_state or resample_btn or st.session_state.get("prev_frac") != sample_frac:
	import random
	st.session_state["sample_seed"] = random.randint(0, 100000)
	st.session_state["prev_frac"] = sample_frac

	sampled_df = df.sample(frac=sample_frac, random_state=st.session_state["sample_seed"])
	df["Select"] = df.index.isin(sampled_df.index)
	elif subset_mode == "All Probes":
	df["Select"] = True

	# Render table editor
	st.markdown("#### 📋 Probes List")
	st.caption("Double-click a cell to search, or check/uncheck boxes to filter targets.")

	edited_df = st.data_editor(
	df,
	use_container_width=True,
	hide_index=True,
	disabled=[col for col in df.columns if col != "Select"],
	column_config={
	"Select": st.column_config.CheckboxColumn(
	"Select",
	help="Uncheck to exclude this region from variant generation",
	default=True
	)
	}
	)
	st.session_state["probes_df"] = edited_df

	total_selected = len(edited_df[edited_df["Select"] == True])
	st.info(f"Selected {total_selected:,} of {len(df):,} probes ({total_selected/len(df)*100:.1f}%) for variant generation.")

	st.divider()

	# ── Step 2: Run pipeline ──────────────────────────────────────────────────────

	st.header("2 · Run Pipeline")

	if not uploaded_bed:
	st.info("Upload a probes BED file to enable the pipeline.")
	st.stop()

	run_btn = st.button("▶ Run Pipeline", type="primary", use_container_width=True)

	# Clear results when a new run is requested
	if run_btn:
	st.session_state.pop("results", None)
	st.session_state.pop("log_lines", None)

	# ── Execute pipeline ──────────────────────────────────────────────────────────

	if run_btn:
	fasta_path = (pl.HG38_FA if genome_version == "hg38" else pl.HG19_FA) if ref_mode == "Use cached / download" else Path(custom_ref_path)

	if ref_mode == "Custom path":
	if not custom_ref_path:
	st.error("Please provide a path to your hg38.fa file.")
	st.stop()
	if not fasta_path.exists():
	st.error(f"FASTA file not found: {fasta_path}")
	st.stop()
	fai = Path(str(fasta_path) + ".fai")
	if not fai.exists():
	st.warning("No .fai index found. Attempting to index with samtools faidx...")
	subprocess.run(f"samtools faidx {fasta_path}", shell=True, capture_output=True)

	# Filter for selected probes
	if "probes_df" in st.session_state:
	df = st.session_state["probes_df"]
	selected_df = df[df["Select"] == True]
	else:
	st.error("No probe data found in session state.")
	st.stop()

	if len(selected_df) == 0:
	st.error("No probes selected! Please select at least one probe in Step 1.5.")
	st.stop()

	# Convert back to BED format (tab-separated, without the 'Select' column)
	bed_cols = [col for col in selected_df.columns if col != "Select"]
	bed_text = selected_df[bed_cols].to_csv(sep="\t", header=False, index=False)

	work_dir = Path(tempfile.mkdtemp(prefix="insilicocontrols_"))
	probes_bed = work_dir / "probes.bed"
	probes_bed.write_text(bed_text)

	log_expander = st.expander("Pipeline log", expanded=True)
	log_area = log_expander.empty()
	log_lines = []

	def append_log(msg):
	log_lines.append(str(msg))
	log_area.code("\n".join(log_lines[-80:]), language=None)

	progress_bar = st.progress(0.0, text="Starting...")

	def update_progress(fraction, label=""):
	progress_bar.progress(min(fraction, 1.0), text=label)

	try:
	# Pre-ensure reference genome
	if ref_mode == "Use cached / download":
	update_progress(0.08, f"Ensuring {genome_version} reference...")
	append_log(f"\n=== {genome_version} Reference ===")
	pl.ensure_reference(genome_version=genome_version, log_func=append_log)

	if target_mode == "MANE Transcript Exons/Introns":
	update_progress(0.02, "Setting up tools...")
	append_log("=== Setting up tools ===")
	pl.ensure_bigbedtobed(append_log)

	update_progress(0.05, "Ensuring MANE annotation...")
	append_log("\n=== MANE Annotation ===")
	pl.ensure_mane(append_log)

	update_progress(0.15, "Parsing MANE exons...")
	append_log("\n=== Parsing MANE Exons ===")
	exons_bed = pl.parse_mane_exons(work_dir, append_log)

	update_progress(0.25, "Analyzing probe coverage...")
	append_log("\n=== Coverage Analysis ===")
	stats, fully_bed, partial_bed, unused_bed = pl.analyze_coverage(
	work_dir, probes_bed, exons_bed, append_log
	)

	append_log("\n============================================")
	append_log(" COVERAGE SUMMARY ")
	append_log("============================================")
	append_log(f"Exons with >95% coverage (USED): {stats['fully_covered']}")
	append_log(f"Exons with partial coverage (USED): {stats['partially_covered']}")
	append_log(f"Probes with no exon coverage: {stats['probes_no_exons']}")
	append_log(f"Unused contiguous probes (ADDED): {stats['unused_probes']}")
	append_log("============================================")

	# Subset MANE transcripts intersecting with target probes
	merged_probes = work_dir / "merged_probes.bed"
	mane_transcripts_bed = work_dir / "mane_transcripts.bed"
	update_progress(0.30, "Subsetting MANE transcripts...")
	append_log("\n=== Subsetting MANE Transcripts ===")
	pl.run_cmd(f"bedtools intersect -a {pl.MANE_BED12} -b {merged_probes} -wa -u > {mane_transcripts_bed}", append_log)

	update_progress(0.35, "Generating target SNVs...")
	append_log("\n=== Generating Target SNVs ===")
	snvs_bed, total_snvs = pl.generate_target_snvs(
	work_dir=work_dir,
	fully_bed=fully_bed,
	partial_bed=partial_bed,
	unused_bed=unused_bed,
	include_cds=include_cds,
	include_intron=include_intron,
	include_offtarget=include_offtarget,
	mode="mane",
	log_func=append_log
	)
	else: # Direct Probe Coordinates mode
	stats = {
	"fully_covered": 0,
	"partially_covered": 0,
	"probes_no_exons": 0,
	"unused_probes": 0,
	}
	fully_bed = None
	partial_bed = None
	unused_bed = None
	mane_transcripts_bed = None

	update_progress(0.35, "Generating target SNVs...")
	append_log("\n=== Generating Target SNVs (Direct BED Mode) ===")
	snvs_bed, total_snvs = pl.generate_target_snvs(
	work_dir=work_dir,
	fully_bed=None,
	partial_bed=None,
	unused_bed=None,
	mode="direct_bed",
	probes_bed=probes_bed,
	direct_window_size=direct_window_size,
	log_func=append_log
	)

	append_log("\n============================================")
	append_log(" VARIANT SUMMARY ")
	append_log("============================================")
	append_log(f"Total SNVs generated for BAM: {total_snvs}")
	append_log("============================================")

	update_progress(0.40, "Generating synthetic BAM...")
	append_log("\n=== Generating Synthetic BAM ===")

	def bam_progress(fraction, label):
	update_progress(0.40 + fraction * 0.55, label)

	sorted_bam, output_vcf = pl.generate_synthetic_bam(
	work_dir=work_dir,
	snvs_bed=snvs_bed,
	fasta_path=fasta_path,
	depth=depth,
	vaf=vaf,
	rg_id=rg_id,
	rg_sm=rg_sm,
	insert_size=insert_size,
	insert_std=insert_std,
	indel_interval=indel_interval,
	read_length=read_length,
	sequencing_mode="pcr_amplicon" if seq_mode.startswith("PCR Amplicon") else "hybrid_capture",
	log_func=append_log,
	progress_func=bam_progress,
	)

	update_progress(1.0, "Done!")
	append_log("\n✅ Pipeline complete.")

	bai_path = Path(str(sorted_bam) + ".bai")
	vcf_path = Path(output_vcf) if not isinstance(output_vcf, Path) else output_vcf
	igv_bed_path = Path(snvs_bed) if not isinstance(snvs_bed, Path) else snvs_bed
	fully_bed_path = Path(fully_bed) if fully_bed and not isinstance(fully_bed, Path) else fully_bed

	# Copy to static directories for IGV.js visualization (both root and script-relative)
	work_dir_name = work_dir.name
	static_dest_cwd = Path("static") / work_dir_name
	static_dest_script = Path(__file__).parent / "static" / work_dir_name

	for dest in [static_dest_cwd, static_dest_script]:
	dest.mkdir(parents=True, exist_ok=True)
	shutil.copy(sorted_bam, dest / "synthetic.sorted.bam")
	if bai_path.exists():
	shutil.copy(bai_path, dest / "synthetic.sorted.bam.bai")
	shutil.copy(vcf_path, dest / "synthetic.vcf")
	shutil.copy(igv_bed_path, dest / "igv_variant_navigator.bed")
	if fully_bed_path and fully_bed_path.exists():
	shutil.copy(fully_bed_path, dest / "fully_covered_exons.bed")
	if mane_transcripts_bed and mane_transcripts_bed.exists():
	shutil.copy(mane_transcripts_bed, dest / "mane_transcripts.bed")

	# Store paths only — never load large files into session_state memory
	st.session_state["results"] = {
	"stats": stats,
	"total_snvs": total_snvs,
	"bam_path": str(sorted_bam),
	"bai_path": str(bai_path) if bai_path.exists() else None,
	"vcf_path": str(vcf_path),
	"igv_bed_path": str(igv_bed_path),
	"fully_covered_bed_path": str(fully_bed_path) if fully_bed_path else None,
	"mane_transcripts_bed_path": str(mane_transcripts_bed) if mane_transcripts_bed else None,
	"work_dir_name": work_dir_name,
	"genome_version": genome_version,
	}
	st.session_state["log_lines"] = log_lines[:]

	except Exception as e:
	st.error(f"Pipeline failed: {e}")
	append_log(f"\n❌ ERROR: {e}")
	raise

	# ── Results section (persists across reruns via session_state) ────────────────

	if "results" in st.session_state:
	res = st.session_state["results"]
	stats = res["stats"]
	total_snvs = res["total_snvs"]

	st.success("Pipeline completed successfully!")

	# Show log if available and pipeline didn't just run
	if not run_btn and "log_lines" in st.session_state:
	with st.expander("Pipeline log", expanded=False):
	st.code("\n".join(st.session_state["log_lines"][-80:]), language=None)

	st.header("3 · Results")
	m1, m2, m3, m4 = st.columns(4)
	m1.metric("Fully Covered Exons", f"{stats['fully_covered']:,}")
	m2.metric("Partially Covered Exons", f"{stats['partially_covered']:,}")
	m3.metric("Off-target Probes", f"{stats['probes_no_exons']:,}")
	m4.metric("Total SNVs Generated", f"{total_snvs:,}")

	st.header("🔍 Interactive Variant Browser")
	st.caption("Inspect the generated synthetic alignments and mutations directly in the browser. Click on a variant in the navigator panel to jump to its locus.")
	render_igv(res)

	st.header("4 · Download Outputs")

	dl1, dl2, dl3 = st.columns(3)

	bam_path = Path(res["bam_path"])
	bai_path = Path(res["bai_path"]) if res["bai_path"] else None
	vcf_path = Path(res["vcf_path"])
	igv_bed_path = Path(res["igv_bed_path"])
	fully_bed_path = Path(res["fully_covered_bed_path"]) if res.get("fully_covered_bed_path") else None

	with dl1:
	st.markdown("Synthetic BAM")
	if bam_path.exists():
	with open(bam_path, "rb") as f:
	st.download_button(
	"⬇ Download BAM",
	data=f,
	file_name="synthetic.sorted.bam",
	mime="application/octet-stream",
	use_container_width=True,
	)

	with dl2:
	st.markdown("BAM Index (.bai)")
	if bai_path and bai_path.exists():
	with open(bai_path, "rb") as f:
	st.download_button(
	"⬇ Download BAI",
	data=f,
	file_name="synthetic.sorted.bam.bai",
	mime="application/octet-stream",
	use_container_width=True,
	)

	with dl3:
	st.markdown("Synthetic VCF")
	if vcf_path.exists():
	with open(vcf_path, "rb") as f:
	st.download_button(
	"⬇ Download VCF",
	data=f,
	file_name="synthetic.vcf",
	mime="text/plain",
	use_container_width=True,
	)

	dl4, dl5, dl6 = st.columns(3)

	with dl4:
	st.markdown("IGV Variant Navigator BED")
	if igv_bed_path.exists():
	with open(igv_bed_path, "rb") as f:
	st.download_button(
	"⬇ Download IGV BED",
	data=f,
	file_name="igv_variant_navigator.bed",
	mime="text/plain",
	use_container_width=True,
	)

	with dl5:
	st.markdown("Fully Covered Exons BED")
	if fully_bed_path and fully_bed_path.exists():
	with open(fully_bed_path, "rb") as f:
	st.download_button(
	"⬇ Download Fully Covered Exons",
	data=f,
	file_name="fully_covered_exons.bed",
	mime="text/plain",
	use_container_width=True,
	)

	with dl6:
	st.markdown("MANE Transcripts BED12")
	mane_transcripts_bed_path = Path(res["mane_transcripts_bed_path"]) if res.get("mane_transcripts_bed_path") else None
	if mane_transcripts_bed_path and mane_transcripts_bed_path.exists():
	with open(mane_transcripts_bed_path, "rb") as f:
	st.download_button(
	"⬇ Download MANE Transcripts",
	data=f,
	file_name="mane_transcripts.bed",
	mime="text/plain",
	use_container_width=True,
	)

	# ── Footer ────────────────────────────────────────────────────────────────────
	st.divider()
	st.caption(
	"How it works: Your probe BED is intersected with MANE CDS exons. "
	"For each covered exon, synthetic SNVs are placed in the CDS and flanking "
	"intronic positions. For unused probes, a variant is placed at the midpoint. "
	"Paired-end reads are generated at the target depth and VAF, then written to "
	"a sorted, indexed BAM alongside a matching VCF."
	)