import streamlit as st
import os
import tempfile
import subprocess
from pathlib import Path
import pipeline as pl
import json
import shutil
def render_igv(res):
work_dir_name = res.get("work_dir_name")
genome_version = res.get("genome_version", "hg38")
if not work_dir_name:
st.warning("No static files path found for IGV.js. Restart the pipeline to generate.")
return
igv_bed_path = Path(res["igv_bed_path"])
variants = []
if igv_bed_path.exists():
with open(igv_bed_path, "r") as f:
for line in f:
if line.startswith("#") or not line.strip():
continue
parts = line.strip().split("\t")
if len(parts) >= 4:
chrom = parts[0]
start = int(parts[1])
end = int(parts[2])
label = parts[3]
locus = f"{chrom}:{max(1, start-50)}-{end+50}"
variants.append({
"locus": locus,
"name": label.split("_")[-1],
"label": label,
"pos_label": f"{chrom}:{start+1}"
})
variants_json = json.dumps(variants)
bam_url = f"/app/static/{work_dir_name}/synthetic.sorted.bam"
bai_url = f"/app/static/{work_dir_name}/synthetic.sorted.bam.bai"
vcf_url = f"/app/static/{work_dir_name}/synthetic.vcf"
navigator_url = f"/app/static/{work_dir_name}/igv_variant_navigator.bed"
probes_url = f"/app/static/{work_dir_name}/fully_covered_exons.bed"
mane_url = f"/app/static/{work_dir_name}/mane_transcripts.bed"
probes_bed_exists = res.get("fully_covered_bed_path") is not None and Path(res["fully_covered_bed_path"]).exists()
mane_transcripts_exists = res.get("mane_transcripts_bed_path") is not None and Path(res["mane_transcripts_bed_path"]).exists()
tracks = [
{
"name": "Reference",
"type": "sequence",
"order": 1
}
]
if mane_transcripts_exists:
tracks.append({
"name": "MANE Transcripts",
"type": "annotation",
"format": "bed",
"url": mane_url,
"indexed": False,
"order": 1.5,
"color": "green",
"displayMode": "EXPANDED"
})
if probes_bed_exists:
tracks.append({
"name": "Probes BED",
"type": "annotation",
"format": "bed",
"url": probes_url,
"indexed": False,
"order": 2,
"color": "blue"
})
tracks.extend([
{
"name": "Variant Navigator BED",
"type": "annotation",
"format": "bed",
"url": navigator_url,
"indexed": False,
"order": 3,
"color": "red"
},
{
"name": "Synthetic VCF",
"type": "variant",
"format": "vcf",
"url": vcf_url,
"indexed": False,
"order": 4
},
{
"name": "Synthetic BAM",
"type": "alignment",
"format": "bam",
"url": bam_url,
"indexURL": bai_url,
"order": 5,
"height": 300
}
])
tracks_json = json.dumps(tracks)
html_content = f"""
Variant Navigator ({len(variants)})
"""
st.components.v1.html(html_content, height=620, scrolling=False)
st.set_page_config(
page_title="In Silico Controls Generator",
page_icon="๐งฌ",
layout="wide",
)
st.title("๐งฌ In Silico Controls Generator")
st.caption(
"Generate synthetic BAM + VCF files with realistic variants "
"derived from your probe panel and MANE exon annotations."
)
# โโ Sidebar: parameters โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
uploaded_bed = st.session_state.get("uploaded_bed_file")
with st.sidebar:
st.header("Pipeline Parameters")
st.subheader("Sequencing Parameters")
depth = st.number_input("Target read depth per variant", min_value=1, max_value=10000, value=100, step=10)
vaf = st.slider("Variant allele frequency (VAF)", min_value=0.01, max_value=1.0, value=0.20, step=0.01, format="%.2f")
read_length = st.number_input("Read length (bp)", min_value=50, max_value=300, value=150, step=10)
st.subheader("Sequencing Technology")
seq_mode = st.radio(
"Sequencing assay style",
options=["Hybrid Capture (Staggered reads)", "PCR Amplicon (Identical start/ends)"],
index=0,
help="Hybrid Capture simulates sheared fragments with staggered read start/end coordinates. PCR Amplicon simulates amplicon sequencing where all reads start and end exactly at the probe/target coordinate boundaries."
)
if seq_mode == "Hybrid Capture (Staggered reads)":
st.subheader("Fragment Insert Size")
insert_size = st.number_input("Mean insert size (bp)", min_value=100, max_value=1000, value=379, step=10)
insert_std = st.number_input("Insert size std dev (bp)", min_value=0, max_value=200, value=20, step=5)
else:
insert_size = 250
insert_std = 0
st.subheader("Indel Parameters")
indel_interval = st.number_input(
"Indel interval (0 = SNVs only)",
min_value=0, max_value=100, value=10, step=1,
help="Make every Nth variant an indel. Set to 0 to generate only SNVs.",
)
st.divider()
st.subheader("Reference Genome")
genome_version = st.selectbox(
"Genome assembly",
options=["hg38", "hg19"],
index=0,
help="Choose the reference genome version (hg38 or hg19)."
)
ref_mode = st.radio(
"FASTA source",
options=["Use cached / download", "Custom path"],
help="Downloads and caches the selected assembly, or lets you point to a custom local path.",
)
custom_ref_path = ""
if ref_mode == "Custom path":
custom_ref_path = st.text_input(
f"Path to {genome_version}.fa",
placeholder=f"/data/references/{genome_version}.fa",
help="Must be an indexed FASTA (.fa + .fa.fai).",
)
st.divider()
st.subheader("Targeting Mode")
if genome_version == "hg19":
st.info("โน๏ธ MANE transcript annotations are hg38-only. Targeting mode is set to Direct Probe Coordinates for hg19.")
target_mode = "Direct Probe Coordinates"
else:
target_mode = st.radio(
"Variant targeting logic",
options=["MANE Transcript Exons/Introns", "Direct Probe Coordinates"],
help="MANE Transcripts Mode places variants in coding exons and flanking introns of protein-coding genes. Direct Probe Mode places a single variant inside each probe coordinate itself, completely ignoring gene annotations."
)
if target_mode == "MANE Transcript Exons/Introns":
st.subheader("Variant Locations")
include_cds = st.checkbox("Generate CDS variants", value=True, help="Place variants in the coding sequence (CDS) of MANE exons.")
include_intron = st.checkbox("Generate flanking intronic variants", value=True, help="Place variants in the flanking introns of MANE exons.")
include_offtarget = st.checkbox("Generate off-target (unused probe) variants", value=True, help="Place variants in the midpoint of probes with no MANE exon coverage.")
direct_window_size = 0
else:
include_cds = False
include_intron = False
include_offtarget = False
st.subheader("Direct Probe Settings")
direct_variant_strategy = st.radio(
"Variant placement strategy",
options=["One variant per N bp window", "Single random variant per probe"],
index=0,
help="Choose whether to generate one variant per N bp window across the probe coordinates or a single random variant per probe."
)
if direct_variant_strategy == "One variant per N bp window":
direct_window_size = st.number_input(
"Window size (bp)",
min_value=1, max_value=1000, value=10, step=1,
help="Place one variant randomly inside each non-overlapping window of this size."
)
else:
direct_window_size = 0
st.subheader("Read Group")
rg_id = st.text_input("Read Group ID", value="CPDV2510843-SEQ-251103")
rg_sm = st.text_input("Sample Name", value="CPDV2510843-SEQ-251103")
st.divider()
st.subheader("๐ ๏ธ Debug Info")
st.caption("Helpful diagnostics for troubleshooting deployment status.")
st.write("Streamlit Version:", st.__version__)
st.write("File Uploaded:", uploaded_bed is not None)
if uploaded_bed:
st.write("Filename:", uploaded_bed.name)
st.write("probes_df in state:", "probes_df" in st.session_state)
if "probes_df" in st.session_state:
st.write("probes_df count:", len(st.session_state["probes_df"]))
import os
st.write("CWD:", os.getcwd())
st.write("Script Path:", __file__)
st.write("static/ exists:", os.path.exists("static"))
st.write("src/static/ exists:", os.path.exists("src/static"))
if os.path.exists("static"):
st.write("static/ folders:", os.listdir("static")[:5])
if os.path.exists("src/static"):
st.write("src/static/ folders:", os.listdir("src/static")[:5])
# โโ Main area โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
col_upload, col_info = st.columns([2, 1])
with col_upload:
st.header("1 ยท Upload Probes BED")
uploaded_bed = st.file_uploader(
"Upload your probes BED file",
type=["bed"],
key="uploaded_bed_file",
help="Standard BED3+ format (chrom, start, end, ...)",
)
with col_info:
st.header("Cache Status")
mane_cached = pl.MANE_BED12.exists()
ref_cached = (pl.HG38_FA.exists() and pl.HG38_FAI.exists()) if genome_version == "hg38" else (pl.HG19_FA.exists() and pl.HG19_FAI.exists())
bigbed_cached = pl.BIGBEDTOBED_PATH.exists()
st.markdown(f"{'โ ' if bigbed_cached else 'โฌ'} bigBedToBed")
st.markdown(f"{'โ ' if mane_cached else 'โฌ'} MANE annotation")
st.markdown(f"{'โ ' if ref_cached else 'โฌ'} {genome_version} reference")
if not ref_cached and ref_mode == "Use cached / download":
st.warning(f"{genome_version} not cached. First run will download and index the assembly, which may take 5โ10 minutes.")
# โโ Step 1.5: Customize Probes โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
if uploaded_bed:
if "probes_df" not in st.session_state or st.session_state.get("uploaded_file_name") != uploaded_bed.name:
import pandas as pd
import io
try:
uploaded_bed.seek(0)
content = uploaded_bed.read().decode("utf-8", errors="ignore")
# Strip comments and headers
lines = [line for line in content.splitlines() if line.strip() and not line.startswith("#") and not line.startswith("track")]
if lines:
df = pd.read_csv(io.StringIO("\n".join(lines)), sep="\t", header=None)
cols = ["chrom", "start", "end"]
if len(df.columns) > 3:
cols += [f"col_{i}" for i in range(3, len(df.columns))]
df.columns = cols[:len(df.columns)]
df.insert(0, "Select", True)
st.session_state["probes_df"] = df
st.session_state["uploaded_file_name"] = uploaded_bed.name
st.session_state.pop("sample_seed", None)
st.session_state.pop("prev_frac", None)
else:
st.error("Uploaded BED file appears to be empty or contains only comments.")
except Exception as e:
st.error(f"Error parsing BED file: {e}")
if "probes_df" in st.session_state:
df = st.session_state["probes_df"]
st.header("1.5 ยท Customize Probes")
st.caption(f"Loaded {len(df):,} probes from {uploaded_bed.name}. Customize which regions will be processed below.")
col_mode, col_rand = st.columns([1, 1])
with col_mode:
subset_mode = st.radio(
"Selection mode",
options=["All Probes", "Manual Selection (below)", "Random Sampling"],
index=0,
help="Choose whether to run all probes, manually check/uncheck probes in the list, or select a random fraction of the probes."
)
with col_rand:
if subset_mode == "Random Sampling":
sample_frac = st.slider("Fraction of probes to keep", min_value=0.01, max_value=1.00, value=0.10, step=0.01)
resample_btn = st.button("๐ฒ Resample")
if "sample_seed" not in st.session_state or resample_btn or st.session_state.get("prev_frac") != sample_frac:
import random
st.session_state["sample_seed"] = random.randint(0, 100000)
st.session_state["prev_frac"] = sample_frac
sampled_df = df.sample(frac=sample_frac, random_state=st.session_state["sample_seed"])
df["Select"] = df.index.isin(sampled_df.index)
elif subset_mode == "All Probes":
df["Select"] = True
# Render table editor
st.markdown("#### ๐ Probes List")
st.caption("Double-click a cell to search, or check/uncheck boxes to filter targets.")
edited_df = st.data_editor(
df,
use_container_width=True,
hide_index=True,
disabled=[col for col in df.columns if col != "Select"],
column_config={
"Select": st.column_config.CheckboxColumn(
"Select",
help="Uncheck to exclude this region from variant generation",
default=True
)
}
)
st.session_state["probes_df"] = edited_df
total_selected = len(edited_df[edited_df["Select"] == True])
st.info(f"Selected {total_selected:,} of {len(df):,} probes ({total_selected/len(df)*100:.1f}%) for variant generation.")
st.divider()
# โโ Step 2: Run pipeline โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
st.header("2 ยท Run Pipeline")
if not uploaded_bed:
st.info("Upload a probes BED file to enable the pipeline.")
st.stop()
run_btn = st.button("โถ Run Pipeline", type="primary", use_container_width=True)
# Clear results when a new run is requested
if run_btn:
st.session_state.pop("results", None)
st.session_state.pop("log_lines", None)
# โโ Execute pipeline โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
if run_btn:
fasta_path = (pl.HG38_FA if genome_version == "hg38" else pl.HG19_FA) if ref_mode == "Use cached / download" else Path(custom_ref_path)
if ref_mode == "Custom path":
if not custom_ref_path:
st.error("Please provide a path to your hg38.fa file.")
st.stop()
if not fasta_path.exists():
st.error(f"FASTA file not found: {fasta_path}")
st.stop()
fai = Path(str(fasta_path) + ".fai")
if not fai.exists():
st.warning("No .fai index found. Attempting to index with samtools faidx...")
subprocess.run(f"samtools faidx {fasta_path}", shell=True, capture_output=True)
# Filter for selected probes
if "probes_df" in st.session_state:
df = st.session_state["probes_df"]
selected_df = df[df["Select"] == True]
else:
st.error("No probe data found in session state.")
st.stop()
if len(selected_df) == 0:
st.error("No probes selected! Please select at least one probe in Step 1.5.")
st.stop()
# Convert back to BED format (tab-separated, without the 'Select' column)
bed_cols = [col for col in selected_df.columns if col != "Select"]
bed_text = selected_df[bed_cols].to_csv(sep="\t", header=False, index=False)
work_dir = Path(tempfile.mkdtemp(prefix="insilicocontrols_"))
probes_bed = work_dir / "probes.bed"
probes_bed.write_text(bed_text)
log_expander = st.expander("Pipeline log", expanded=True)
log_area = log_expander.empty()
log_lines = []
def append_log(msg):
log_lines.append(str(msg))
log_area.code("\n".join(log_lines[-80:]), language=None)
progress_bar = st.progress(0.0, text="Starting...")
def update_progress(fraction, label=""):
progress_bar.progress(min(fraction, 1.0), text=label)
try:
# Pre-ensure reference genome
if ref_mode == "Use cached / download":
update_progress(0.08, f"Ensuring {genome_version} reference...")
append_log(f"\n=== {genome_version} Reference ===")
pl.ensure_reference(genome_version=genome_version, log_func=append_log)
if target_mode == "MANE Transcript Exons/Introns":
update_progress(0.02, "Setting up tools...")
append_log("=== Setting up tools ===")
pl.ensure_bigbedtobed(append_log)
update_progress(0.05, "Ensuring MANE annotation...")
append_log("\n=== MANE Annotation ===")
pl.ensure_mane(append_log)
update_progress(0.15, "Parsing MANE exons...")
append_log("\n=== Parsing MANE Exons ===")
exons_bed = pl.parse_mane_exons(work_dir, append_log)
update_progress(0.25, "Analyzing probe coverage...")
append_log("\n=== Coverage Analysis ===")
stats, fully_bed, partial_bed, unused_bed = pl.analyze_coverage(
work_dir, probes_bed, exons_bed, append_log
)
append_log("\n============================================")
append_log(" COVERAGE SUMMARY ")
append_log("============================================")
append_log(f"Exons with >95% coverage (USED): {stats['fully_covered']}")
append_log(f"Exons with partial coverage (USED): {stats['partially_covered']}")
append_log(f"Probes with no exon coverage: {stats['probes_no_exons']}")
append_log(f"Unused contiguous probes (ADDED): {stats['unused_probes']}")
append_log("============================================")
# Subset MANE transcripts intersecting with target probes
merged_probes = work_dir / "merged_probes.bed"
mane_transcripts_bed = work_dir / "mane_transcripts.bed"
update_progress(0.30, "Subsetting MANE transcripts...")
append_log("\n=== Subsetting MANE Transcripts ===")
pl.run_cmd(f"bedtools intersect -a {pl.MANE_BED12} -b {merged_probes} -wa -u > {mane_transcripts_bed}", append_log)
update_progress(0.35, "Generating target SNVs...")
append_log("\n=== Generating Target SNVs ===")
snvs_bed, total_snvs = pl.generate_target_snvs(
work_dir=work_dir,
fully_bed=fully_bed,
partial_bed=partial_bed,
unused_bed=unused_bed,
include_cds=include_cds,
include_intron=include_intron,
include_offtarget=include_offtarget,
mode="mane",
log_func=append_log
)
else: # Direct Probe Coordinates mode
stats = {
"fully_covered": 0,
"partially_covered": 0,
"probes_no_exons": 0,
"unused_probes": 0,
}
fully_bed = None
partial_bed = None
unused_bed = None
mane_transcripts_bed = None
update_progress(0.35, "Generating target SNVs...")
append_log("\n=== Generating Target SNVs (Direct BED Mode) ===")
snvs_bed, total_snvs = pl.generate_target_snvs(
work_dir=work_dir,
fully_bed=None,
partial_bed=None,
unused_bed=None,
mode="direct_bed",
probes_bed=probes_bed,
direct_window_size=direct_window_size,
log_func=append_log
)
append_log("\n============================================")
append_log(" VARIANT SUMMARY ")
append_log("============================================")
append_log(f"Total SNVs generated for BAM: {total_snvs}")
append_log("============================================")
update_progress(0.40, "Generating synthetic BAM...")
append_log("\n=== Generating Synthetic BAM ===")
def bam_progress(fraction, label):
update_progress(0.40 + fraction * 0.55, label)
sorted_bam, output_vcf = pl.generate_synthetic_bam(
work_dir=work_dir,
snvs_bed=snvs_bed,
fasta_path=fasta_path,
depth=depth,
vaf=vaf,
rg_id=rg_id,
rg_sm=rg_sm,
insert_size=insert_size,
insert_std=insert_std,
indel_interval=indel_interval,
read_length=read_length,
sequencing_mode="pcr_amplicon" if seq_mode.startswith("PCR Amplicon") else "hybrid_capture",
log_func=append_log,
progress_func=bam_progress,
)
update_progress(1.0, "Done!")
append_log("\nโ Pipeline complete.")
bai_path = Path(str(sorted_bam) + ".bai")
vcf_path = Path(output_vcf) if not isinstance(output_vcf, Path) else output_vcf
igv_bed_path = Path(snvs_bed) if not isinstance(snvs_bed, Path) else snvs_bed
fully_bed_path = Path(fully_bed) if fully_bed and not isinstance(fully_bed, Path) else fully_bed
# Copy to static directories for IGV.js visualization (both root and script-relative)
work_dir_name = work_dir.name
static_dest_cwd = Path("static") / work_dir_name
static_dest_script = Path(__file__).parent / "static" / work_dir_name
for dest in [static_dest_cwd, static_dest_script]:
dest.mkdir(parents=True, exist_ok=True)
shutil.copy(sorted_bam, dest / "synthetic.sorted.bam")
if bai_path.exists():
shutil.copy(bai_path, dest / "synthetic.sorted.bam.bai")
shutil.copy(vcf_path, dest / "synthetic.vcf")
shutil.copy(igv_bed_path, dest / "igv_variant_navigator.bed")
if fully_bed_path and fully_bed_path.exists():
shutil.copy(fully_bed_path, dest / "fully_covered_exons.bed")
if mane_transcripts_bed and mane_transcripts_bed.exists():
shutil.copy(mane_transcripts_bed, dest / "mane_transcripts.bed")
# Store paths only โ never load large files into session_state memory
st.session_state["results"] = {
"stats": stats,
"total_snvs": total_snvs,
"bam_path": str(sorted_bam),
"bai_path": str(bai_path) if bai_path.exists() else None,
"vcf_path": str(vcf_path),
"igv_bed_path": str(igv_bed_path),
"fully_covered_bed_path": str(fully_bed_path) if fully_bed_path else None,
"mane_transcripts_bed_path": str(mane_transcripts_bed) if mane_transcripts_bed else None,
"work_dir_name": work_dir_name,
"genome_version": genome_version,
}
st.session_state["log_lines"] = log_lines[:]
except Exception as e:
st.error(f"Pipeline failed: {e}")
append_log(f"\nโ ERROR: {e}")
raise
# โโ Results section (persists across reruns via session_state) โโโโโโโโโโโโโโโโ
if "results" in st.session_state:
res = st.session_state["results"]
stats = res["stats"]
total_snvs = res["total_snvs"]
st.success("Pipeline completed successfully!")
# Show log if available and pipeline didn't just run
if not run_btn and "log_lines" in st.session_state:
with st.expander("Pipeline log", expanded=False):
st.code("\n".join(st.session_state["log_lines"][-80:]), language=None)
st.header("3 ยท Results")
m1, m2, m3, m4 = st.columns(4)
m1.metric("Fully Covered Exons", f"{stats['fully_covered']:,}")
m2.metric("Partially Covered Exons", f"{stats['partially_covered']:,}")
m3.metric("Off-target Probes", f"{stats['probes_no_exons']:,}")
m4.metric("Total SNVs Generated", f"{total_snvs:,}")
st.header("๐ Interactive Variant Browser")
st.caption("Inspect the generated synthetic alignments and mutations directly in the browser. Click on a variant in the navigator panel to jump to its locus.")
render_igv(res)
st.header("4 ยท Download Outputs")
dl1, dl2, dl3 = st.columns(3)
bam_path = Path(res["bam_path"])
bai_path = Path(res["bai_path"]) if res["bai_path"] else None
vcf_path = Path(res["vcf_path"])
igv_bed_path = Path(res["igv_bed_path"])
fully_bed_path = Path(res["fully_covered_bed_path"]) if res.get("fully_covered_bed_path") else None
with dl1:
st.markdown("**Synthetic BAM**")
if bam_path.exists():
with open(bam_path, "rb") as f:
st.download_button(
"โฌ Download BAM",
data=f,
file_name="synthetic.sorted.bam",
mime="application/octet-stream",
use_container_width=True,
)
with dl2:
st.markdown("**BAM Index (.bai)**")
if bai_path and bai_path.exists():
with open(bai_path, "rb") as f:
st.download_button(
"โฌ Download BAI",
data=f,
file_name="synthetic.sorted.bam.bai",
mime="application/octet-stream",
use_container_width=True,
)
with dl3:
st.markdown("**Synthetic VCF**")
if vcf_path.exists():
with open(vcf_path, "rb") as f:
st.download_button(
"โฌ Download VCF",
data=f,
file_name="synthetic.vcf",
mime="text/plain",
use_container_width=True,
)
dl4, dl5, dl6 = st.columns(3)
with dl4:
st.markdown("**IGV Variant Navigator BED**")
if igv_bed_path.exists():
with open(igv_bed_path, "rb") as f:
st.download_button(
"โฌ Download IGV BED",
data=f,
file_name="igv_variant_navigator.bed",
mime="text/plain",
use_container_width=True,
)
with dl5:
st.markdown("**Fully Covered Exons BED**")
if fully_bed_path and fully_bed_path.exists():
with open(fully_bed_path, "rb") as f:
st.download_button(
"โฌ Download Fully Covered Exons",
data=f,
file_name="fully_covered_exons.bed",
mime="text/plain",
use_container_width=True,
)
with dl6:
st.markdown("**MANE Transcripts BED12**")
mane_transcripts_bed_path = Path(res["mane_transcripts_bed_path"]) if res.get("mane_transcripts_bed_path") else None
if mane_transcripts_bed_path and mane_transcripts_bed_path.exists():
with open(mane_transcripts_bed_path, "rb") as f:
st.download_button(
"โฌ Download MANE Transcripts",
data=f,
file_name="mane_transcripts.bed",
mime="text/plain",
use_container_width=True,
)
# โโ Footer โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
st.divider()
st.caption(
"**How it works:** Your probe BED is intersected with MANE CDS exons. "
"For each covered exon, synthetic SNVs are placed in the CDS and flanking "
"intronic positions. For unused probes, a variant is placed at the midpoint. "
"Paired-end reads are generated at the target depth and VAF, then written to "
"a sorted, indexed BAM alongside a matching VCF."
)