Spaces:
Sleeping
Sleeping
Update app.py
Browse files✅ GenBank and FASTA upload
✅ Full GenBank record fetching from NCBI by accession ID
✅ Dynamic product size slider
✅ Primer binding visualization with pair selector
✅ Plot export as PNG or SVG
app.py
CHANGED
|
@@ -1,25 +1,27 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import primer3
|
| 4 |
-
from Bio import SeqIO
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
from io import StringIO, BytesIO
|
| 7 |
import os
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
st.sidebar.header("User Guide")
|
| 11 |
st.sidebar.info("""
|
| 12 |
-
1. Upload a GenBank or
|
| 13 |
-
2.
|
| 14 |
-
3. Set product size
|
| 15 |
-
4.
|
| 16 |
-
5. Select a pair to visualize and download plot.
|
| 17 |
""")
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
def extract_features_from_genbank(content):
|
| 22 |
-
record = SeqIO.read(StringIO(content.decode("utf-8")), "genbank")
|
| 23 |
features = {'CDS': [], 'tRNA': [], 'gene': []}
|
| 24 |
for f in record.features:
|
| 25 |
if f.type in features:
|
|
@@ -29,6 +31,10 @@ def extract_features_from_genbank(content):
|
|
| 29 |
def parse_fasta(content):
|
| 30 |
return SeqIO.read(StringIO(content.decode("utf-8")), "fasta")
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
def design_primers(seq, size_range, count):
|
| 33 |
min_size, max_size = map(int, size_range.split('-'))
|
| 34 |
return primer3.bindings.designPrimers(
|
|
@@ -62,67 +68,90 @@ def plot_primer_binding(seq_len, left_start, left_len, right_start, right_len, p
|
|
| 62 |
ax.set_title("Primer Binding Sites and PCR Product", fontsize=14)
|
| 63 |
return fig
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
ext = os.path.splitext(uploaded_file.name)[1].lower()
|
| 71 |
-
if ext in ['.
|
| 72 |
-
features, record = extract_features_from_genbank(uploaded_file.getvalue())
|
| 73 |
-
st.subheader("Feature Selection")
|
| 74 |
-
ftype = st.selectbox("Select feature type", ['CDS', 'tRNA', 'gene'])
|
| 75 |
-
options = [
|
| 76 |
-
f"{f.qualifiers.get('gene', [''])[0]} ({f.location.start}:{f.location.end}) [length: {len(f.location)} bp]"
|
| 77 |
-
for f in features[ftype]
|
| 78 |
-
]
|
| 79 |
-
selected = st.selectbox("Select feature", range(len(options)), format_func=lambda x: options[x])
|
| 80 |
-
selected_feature = features[ftype][selected]
|
| 81 |
-
sequence = selected_feature.extract(record.seq)
|
| 82 |
-
elif ext in ['.fa', '.fasta']:
|
| 83 |
record = parse_fasta(uploaded_file.getvalue())
|
| 84 |
sequence = record.seq
|
| 85 |
-
st.success(f"FASTA
|
|
|
|
|
|
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
if sequence:
|
| 88 |
st.code(str(sequence), language="text")
|
| 89 |
-
st.subheader("Primer Design
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
if "primers" not in st.session_state:
|
| 94 |
-
st.session_state.primers = []
|
| 95 |
-
st.session_state.df = pd.DataFrame()
|
| 96 |
|
| 97 |
if st.button("Design Primers"):
|
| 98 |
with st.spinner("Designing primers..."):
|
| 99 |
-
|
| 100 |
rows = []
|
| 101 |
-
for i in range(
|
| 102 |
-
if f'PRIMER_LEFT_{i}_SEQUENCE' in
|
| 103 |
rows.append({
|
| 104 |
"Pair": i + 1,
|
| 105 |
-
"Left Seq":
|
| 106 |
-
"Right Seq":
|
| 107 |
-
"Left TM": round(
|
| 108 |
-
"Right TM": round(
|
| 109 |
-
"Product Size":
|
| 110 |
-
"L_Pos":
|
| 111 |
-
"L_Len":
|
| 112 |
-
"R_Pos":
|
| 113 |
-
"R_Len":
|
| 114 |
})
|
| 115 |
-
|
| 116 |
|
| 117 |
-
if not
|
| 118 |
-
st.subheader("Designed
|
| 119 |
-
st.dataframe(
|
| 120 |
|
| 121 |
-
csv =
|
| 122 |
st.download_button("Download Primer Table", csv, "primers.csv", "text/csv")
|
| 123 |
|
| 124 |
-
selected_pair = st.selectbox("Select Primer Pair to Visualize",
|
| 125 |
-
row =
|
| 126 |
|
| 127 |
fig = plot_primer_binding(
|
| 128 |
len(sequence),
|
|
@@ -132,11 +161,10 @@ if sequence:
|
|
| 132 |
)
|
| 133 |
st.pyplot(fig)
|
| 134 |
|
| 135 |
-
|
| 136 |
buf = BytesIO()
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
st.download_button(f"Download Plot ({export_fmt})", buf.getvalue(), f"primer_plot.{fmt}", f"image/{fmt}")
|
| 140 |
|
| 141 |
# Footer
|
| 142 |
st.markdown("""
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import primer3
|
| 4 |
+
from Bio import SeqIO, Entrez
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
from io import StringIO, BytesIO
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
# Required by NCBI
|
| 10 |
+
Entrez.email = "your.email@example.com"
|
| 11 |
+
|
| 12 |
+
st.set_page_config(page_title="PCR Primer Designer", layout="wide")
|
| 13 |
+
|
| 14 |
st.sidebar.header("User Guide")
|
| 15 |
st.sidebar.info("""
|
| 16 |
+
1. Upload a GenBank/FASTA file or enter an NCBI accession ID.
|
| 17 |
+
2. For GenBank: select a feature for primer design.
|
| 18 |
+
3. Set PCR product size and number of primers.
|
| 19 |
+
4. View, select, and download primer designs and visualizations.
|
|
|
|
| 20 |
""")
|
| 21 |
|
| 22 |
+
# Helper functions
|
|
|
|
| 23 |
def extract_features_from_genbank(content):
|
| 24 |
+
record = SeqIO.read(StringIO(content.decode("utf-8") if isinstance(content, bytes) else content), "genbank")
|
| 25 |
features = {'CDS': [], 'tRNA': [], 'gene': []}
|
| 26 |
for f in record.features:
|
| 27 |
if f.type in features:
|
|
|
|
| 31 |
def parse_fasta(content):
|
| 32 |
return SeqIO.read(StringIO(content.decode("utf-8")), "fasta")
|
| 33 |
|
| 34 |
+
def fetch_genbank_from_ncbi(accession):
|
| 35 |
+
with Entrez.efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") as handle:
|
| 36 |
+
return handle.read()
|
| 37 |
+
|
| 38 |
def design_primers(seq, size_range, count):
|
| 39 |
min_size, max_size = map(int, size_range.split('-'))
|
| 40 |
return primer3.bindings.designPrimers(
|
|
|
|
| 68 |
ax.set_title("Primer Binding Sites and PCR Product", fontsize=14)
|
| 69 |
return fig
|
| 70 |
|
| 71 |
+
# UI: NCBI or file
|
| 72 |
+
st.subheader("Input Sequence")
|
| 73 |
+
col1, col2 = st.columns(2)
|
| 74 |
+
with col1:
|
| 75 |
+
accession_input = st.text_input("Enter NCBI accession (e.g., NM_000546)")
|
| 76 |
+
fetch_btn = st.button("Fetch GenBank Record")
|
| 77 |
|
| 78 |
+
with col2:
|
| 79 |
+
uploaded_file = st.file_uploader("Or upload GenBank/FASTA", type=["gb", "gbk", "fa", "fasta"])
|
| 80 |
+
|
| 81 |
+
sequence = None
|
| 82 |
+
record = None
|
| 83 |
+
df = pd.DataFrame()
|
| 84 |
+
|
| 85 |
+
# Handle input
|
| 86 |
+
if fetch_btn and accession_input:
|
| 87 |
+
try:
|
| 88 |
+
gb_text = fetch_genbank_from_ncbi(accession_input.strip())
|
| 89 |
+
record = SeqIO.read(StringIO(gb_text), "genbank")
|
| 90 |
+
ext = ".gb"
|
| 91 |
+
st.success(f"Fetched GenBank record for {accession_input}")
|
| 92 |
+
except Exception as e:
|
| 93 |
+
st.error(f"Error fetching GenBank record: {e}")
|
| 94 |
+
|
| 95 |
+
elif uploaded_file:
|
| 96 |
ext = os.path.splitext(uploaded_file.name)[1].lower()
|
| 97 |
+
if ext in ['.fa', '.fasta']:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
record = parse_fasta(uploaded_file.getvalue())
|
| 99 |
sequence = record.seq
|
| 100 |
+
st.success(f"FASTA file loaded: {len(sequence)} bp")
|
| 101 |
+
else:
|
| 102 |
+
features, record = extract_features_from_genbank(uploaded_file.getvalue())
|
| 103 |
|
| 104 |
+
# Feature selection if GenBank
|
| 105 |
+
if record and hasattr(record, "features"):
|
| 106 |
+
features, record = extract_features_from_genbank(record.format("genbank"))
|
| 107 |
+
st.subheader("Feature Selection")
|
| 108 |
+
ftype = st.selectbox("Feature type", ['CDS', 'tRNA', 'gene'])
|
| 109 |
+
options = [
|
| 110 |
+
f"{f.qualifiers.get('gene', [''])[0]} ({f.location.start}:{f.location.end}) [length: {len(f.location)} bp]"
|
| 111 |
+
for f in features[ftype]
|
| 112 |
+
]
|
| 113 |
+
selected = st.selectbox("Select feature", range(len(options)), format_func=lambda x: options[x])
|
| 114 |
+
selected_feature = features[ftype][selected]
|
| 115 |
+
sequence = selected_feature.extract(record.seq)
|
| 116 |
+
elif record and hasattr(record, "seq"):
|
| 117 |
+
sequence = record.seq
|
| 118 |
+
|
| 119 |
+
# Primer design UI
|
| 120 |
if sequence:
|
| 121 |
st.code(str(sequence), language="text")
|
| 122 |
+
st.subheader("Primer Design Settings")
|
| 123 |
+
size_range = st.slider("PCR product size (bp)", 100, len(sequence), (100, min(500, len(sequence))), step=10)
|
| 124 |
+
pair_count = st.number_input("Number of primer pairs", 1, 20, 5)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
if st.button("Design Primers"):
|
| 127 |
with st.spinner("Designing primers..."):
|
| 128 |
+
result = design_primers(sequence, f"{size_range[0]}-{size_range[1]}", pair_count)
|
| 129 |
rows = []
|
| 130 |
+
for i in range(pair_count):
|
| 131 |
+
if f'PRIMER_LEFT_{i}_SEQUENCE' in result:
|
| 132 |
rows.append({
|
| 133 |
"Pair": i + 1,
|
| 134 |
+
"Left Seq": result[f'PRIMER_LEFT_{i}_SEQUENCE'],
|
| 135 |
+
"Right Seq": result[f'PRIMER_RIGHT_{i}_SEQUENCE'],
|
| 136 |
+
"Left TM": round(result[f'PRIMER_LEFT_{i}_TM'], 2),
|
| 137 |
+
"Right TM": round(result[f'PRIMER_RIGHT_{i}_TM'], 2),
|
| 138 |
+
"Product Size": result[f'PRIMER_PAIR_{i}_PRODUCT_SIZE'],
|
| 139 |
+
"L_Pos": result[f'PRIMER_LEFT_{i}'][0],
|
| 140 |
+
"L_Len": result[f'PRIMER_LEFT_{i}'][1],
|
| 141 |
+
"R_Pos": result[f'PRIMER_RIGHT_{i}'][0],
|
| 142 |
+
"R_Len": result[f'PRIMER_RIGHT_{i}'][1],
|
| 143 |
})
|
| 144 |
+
df = pd.DataFrame(rows)
|
| 145 |
|
| 146 |
+
if not df.empty:
|
| 147 |
+
st.subheader("Designed Primer Pairs")
|
| 148 |
+
st.dataframe(df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]))
|
| 149 |
|
| 150 |
+
csv = df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]).to_csv(index=False).encode("utf-8")
|
| 151 |
st.download_button("Download Primer Table", csv, "primers.csv", "text/csv")
|
| 152 |
|
| 153 |
+
selected_pair = st.selectbox("Select Primer Pair to Visualize", df["Pair"])
|
| 154 |
+
row = df[df["Pair"] == selected_pair].iloc[0]
|
| 155 |
|
| 156 |
fig = plot_primer_binding(
|
| 157 |
len(sequence),
|
|
|
|
| 161 |
)
|
| 162 |
st.pyplot(fig)
|
| 163 |
|
| 164 |
+
fmt = st.radio("Export format", ["PNG", "SVG"])
|
| 165 |
buf = BytesIO()
|
| 166 |
+
fig.savefig(buf, format=fmt.lower(), dpi=300)
|
| 167 |
+
st.download_button(f"Download Plot ({fmt})", buf.getvalue(), f"primer_plot.{fmt.lower()}", f"image/{fmt.lower()}")
|
|
|
|
| 168 |
|
| 169 |
# Footer
|
| 170 |
st.markdown("""
|