Spaces:
Sleeping
Sleeping
Update app.py
Browse files✅ GenBank and FASTA support
✅ Dynamic product size slider
✅ Primer binding site visualization (publication-ready)
✅ Primer pair selection dropdown
✅ Export as PNG and SVG
app.py
CHANGED
|
@@ -2,59 +2,46 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
import primer3
|
| 4 |
from Bio import SeqIO
|
|
|
|
|
|
|
| 5 |
import os
|
| 6 |
-
from io import StringIO
|
| 7 |
|
| 8 |
# Ensure temp directory exists
|
| 9 |
temp_dir = "temp"
|
| 10 |
os.makedirs(temp_dir, exist_ok=True)
|
| 11 |
|
| 12 |
-
#
|
| 13 |
st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide")
|
| 14 |
-
|
| 15 |
-
# User Documentation
|
| 16 |
st.sidebar.header("User Guide")
|
| 17 |
-
st.sidebar.info(
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
# Helper functions
|
| 36 |
-
def extract_features_from_genbank(genbank_content, feature_types=['CDS', 'tRNA', 'gene']):
|
| 37 |
-
text_stream = StringIO(genbank_content.decode("utf-8")) if isinstance(genbank_content, bytes) else genbank_content
|
| 38 |
-
record = SeqIO.read(text_stream, "genbank")
|
| 39 |
-
features = {ftype: [] for ftype in feature_types}
|
| 40 |
-
for feature in record.features:
|
| 41 |
-
if feature.type in feature_types:
|
| 42 |
-
features[feature.type].append(feature)
|
| 43 |
return features, record
|
| 44 |
|
| 45 |
-
def parse_fasta(
|
| 46 |
-
|
| 47 |
-
record = SeqIO.read(text_stream, "fasta")
|
| 48 |
-
return record
|
| 49 |
|
| 50 |
-
def
|
| 51 |
-
size_min, size_max = map(int,
|
| 52 |
return primer3.bindings.designPrimers(
|
|
|
|
| 53 |
{
|
| 54 |
-
'
|
| 55 |
-
'PRIMER_PRODUCT_SIZE_RANGE': [[size_min, size_max]]
|
| 56 |
-
},
|
| 57 |
-
{
|
| 58 |
'PRIMER_OPT_SIZE': 20,
|
| 59 |
'PRIMER_MIN_SIZE': 18,
|
| 60 |
'PRIMER_MAX_SIZE': 23,
|
|
@@ -63,99 +50,105 @@ def design_primers_for_region(sequence, product_size_range, num_to_return=10):
|
|
| 63 |
'PRIMER_MAX_TM': 63.0,
|
| 64 |
'PRIMER_MIN_GC': 20.0,
|
| 65 |
'PRIMER_MAX_GC': 80.0,
|
| 66 |
-
'PRIMER_NUM_RETURN':
|
| 67 |
}
|
| 68 |
)
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
)
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
| 101 |
record = parse_fasta(uploaded_file.getvalue())
|
| 102 |
sequence = record.seq
|
| 103 |
-
st.
|
| 104 |
-
st.code(str(sequence), language="text")
|
| 105 |
-
else:
|
| 106 |
-
st.error("Unsupported file format.")
|
| 107 |
-
st.stop()
|
| 108 |
-
|
| 109 |
-
st.write("## Primer Design Parameters")
|
| 110 |
-
|
| 111 |
-
product_size_range = st.slider(
|
| 112 |
-
"Select PCR product size range:",
|
| 113 |
-
min_value=100,
|
| 114 |
-
max_value=len(sequence),
|
| 115 |
-
value=(100, min(500, len(sequence))),
|
| 116 |
-
step=10,
|
| 117 |
-
help="Range of the desired PCR product size."
|
| 118 |
-
)
|
| 119 |
-
|
| 120 |
-
min_num_primers = st.number_input(
|
| 121 |
-
"Number of primer pairs to return:",
|
| 122 |
-
min_value=1, value=5, step=1,
|
| 123 |
-
help="How many primer pairs to return."
|
| 124 |
-
)
|
| 125 |
|
| 126 |
-
if
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
)
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
'Left Sequence': left,
|
| 142 |
-
'Right Sequence': right,
|
| 143 |
-
'Left TM (°C)': primers.get(f'PRIMER_LEFT_{i}_TM', 'N/A'),
|
| 144 |
-
'Right TM (°C)': primers.get(f'PRIMER_RIGHT_{i}_TM', 'N/A'),
|
| 145 |
-
'Left Length': len(left),
|
| 146 |
-
'Right Length': len(right),
|
| 147 |
-
'PCR Product Size (bp)': primers.get(f'PRIMER_PAIR_{i}_PRODUCT_SIZE', 'N/A')
|
| 148 |
-
})
|
| 149 |
-
|
| 150 |
-
if primer_data:
|
| 151 |
-
st.subheader('Designed Primers')
|
| 152 |
-
df = pd.DataFrame(primer_data)
|
| 153 |
-
st.table(df)
|
| 154 |
-
|
| 155 |
-
csv = df.to_csv(index=False).encode('utf-8')
|
| 156 |
-
st.download_button("Download Primers as CSV", csv, "primers.csv", "text/csv")
|
| 157 |
-
else:
|
| 158 |
-
st.error("No primers found. Try adjusting your parameters.")
|
| 159 |
|
| 160 |
# Footer
|
| 161 |
st.markdown("""
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import primer3
|
| 4 |
from Bio import SeqIO
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
from io import StringIO, BytesIO
|
| 7 |
import os
|
|
|
|
| 8 |
|
| 9 |
# Ensure temp directory exists
|
| 10 |
temp_dir = "temp"
|
| 11 |
os.makedirs(temp_dir, exist_ok=True)
|
| 12 |
|
| 13 |
+
# UI setup
|
| 14 |
st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide")
|
|
|
|
|
|
|
| 15 |
st.sidebar.header("User Guide")
|
| 16 |
+
st.sidebar.info("""
|
| 17 |
+
1. Upload a GenBank or FASTA file.
|
| 18 |
+
2. If GenBank: select a feature to design primers.
|
| 19 |
+
3. Adjust PCR product size range and number of pairs.
|
| 20 |
+
4. Generate primers and select a pair to visualize.
|
| 21 |
+
5. Download primer table and plot (PNG or SVG).
|
| 22 |
+
""")
|
| 23 |
+
|
| 24 |
+
# Upload input
|
| 25 |
+
uploaded_file = st.file_uploader("Upload GenBank or FASTA file", type=['gb', 'gbk', 'fa', 'fasta'])
|
| 26 |
+
|
| 27 |
+
def extract_features_from_genbank(content):
|
| 28 |
+
stream = StringIO(content.decode("utf-8"))
|
| 29 |
+
record = SeqIO.read(stream, "genbank")
|
| 30 |
+
features = {'CDS': [], 'tRNA': [], 'gene': []}
|
| 31 |
+
for f in record.features:
|
| 32 |
+
if f.type in features:
|
| 33 |
+
features[f.type].append(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
return features, record
|
| 35 |
|
| 36 |
+
def parse_fasta(content):
|
| 37 |
+
return SeqIO.read(StringIO(content.decode("utf-8")), "fasta")
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
def design_primers(seq, size_range, count):
|
| 40 |
+
size_min, size_max = map(int, size_range.split('-'))
|
| 41 |
return primer3.bindings.designPrimers(
|
| 42 |
+
{'SEQUENCE_TEMPLATE': str(seq)},
|
| 43 |
{
|
| 44 |
+
'PRIMER_PRODUCT_SIZE_RANGE': [[size_min, size_max]],
|
|
|
|
|
|
|
|
|
|
| 45 |
'PRIMER_OPT_SIZE': 20,
|
| 46 |
'PRIMER_MIN_SIZE': 18,
|
| 47 |
'PRIMER_MAX_SIZE': 23,
|
|
|
|
| 50 |
'PRIMER_MAX_TM': 63.0,
|
| 51 |
'PRIMER_MIN_GC': 20.0,
|
| 52 |
'PRIMER_MAX_GC': 80.0,
|
| 53 |
+
'PRIMER_NUM_RETURN': count,
|
| 54 |
}
|
| 55 |
)
|
| 56 |
|
| 57 |
+
def plot_primer_binding(seq_len, left_start, left_len, right_start, right_len, product_size):
|
| 58 |
+
fig, ax = plt.subplots(figsize=(12, 2))
|
| 59 |
+
ax.hlines(0, 0, seq_len, color='black', linewidth=2)
|
| 60 |
+
ax.plot([left_start, left_start + left_len], [0.2, 0.2], color='blue', linewidth=4)
|
| 61 |
+
ax.text(left_start, 0.35, 'Left Primer', color='blue', fontsize=12)
|
| 62 |
+
ax.plot([right_start - right_len, right_start], [-0.2, -0.2], color='red', linewidth=4)
|
| 63 |
+
ax.text(right_start - right_len, -0.4, 'Right Primer', color='red', fontsize=12)
|
| 64 |
+
ax.hlines(0.05, left_start + left_len, right_start - right_len, color='gray', linestyle='--', linewidth=2)
|
| 65 |
+
ax.text((left_start + right_start) // 2, 0.1, f'{product_size} bp', fontsize=12, ha='center', color='gray')
|
| 66 |
+
ax.set_ylim(-1, 1)
|
| 67 |
+
ax.set_xlim(-20, seq_len + 20)
|
| 68 |
+
ax.axis('off')
|
| 69 |
+
ax.set_title("Primer Binding Sites and PCR Product", fontsize=14)
|
| 70 |
+
return fig
|
| 71 |
+
|
| 72 |
+
if uploaded_file:
|
| 73 |
+
ext = os.path.splitext(uploaded_file.name)[1].lower()
|
| 74 |
+
sequence = None
|
| 75 |
+
record = None
|
| 76 |
+
|
| 77 |
+
if ext in ['.gb', '.gbk']:
|
| 78 |
+
features, record = extract_features_from_genbank(uploaded_file.getvalue())
|
| 79 |
+
st.subheader("Feature Selection")
|
| 80 |
+
ftype = st.selectbox("Select feature type", ['CDS', 'tRNA', 'gene'])
|
| 81 |
+
options = [
|
| 82 |
+
f"{f.qualifiers.get('gene', [''])[0]} ({f.location.start}:{f.location.end}) [length: {len(f.location)} bp]"
|
| 83 |
+
for f in features[ftype]
|
| 84 |
+
]
|
| 85 |
+
selected = st.selectbox("Select feature", range(len(options)), format_func=lambda x: options[x])
|
| 86 |
+
selected_feature = features[ftype][selected]
|
| 87 |
+
sequence = selected_feature.extract(record.seq)
|
| 88 |
+
elif ext in ['.fa', '.fasta']:
|
| 89 |
record = parse_fasta(uploaded_file.getvalue())
|
| 90 |
sequence = record.seq
|
| 91 |
+
st.success(f"FASTA file loaded: {len(sequence)} bp sequence")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
if sequence:
|
| 94 |
+
st.code(str(sequence), language="text")
|
| 95 |
+
st.subheader("Primer Design Parameters")
|
| 96 |
+
product_range = st.slider(
|
| 97 |
+
"PCR product size range",
|
| 98 |
+
min_value=100,
|
| 99 |
+
max_value=len(sequence),
|
| 100 |
+
value=(100, min(500, len(sequence))),
|
| 101 |
+
step=10
|
| 102 |
+
)
|
| 103 |
+
primer_count = st.number_input("Number of primer pairs", min_value=1, value=5, step=1)
|
| 104 |
+
|
| 105 |
+
if st.button("Design Primers"):
|
| 106 |
+
with st.spinner("Designing primers..."):
|
| 107 |
+
primers = design_primers(sequence, f"{product_range[0]}-{product_range[1]}", primer_count)
|
| 108 |
+
|
| 109 |
+
data = []
|
| 110 |
+
for i in range(primer_count):
|
| 111 |
+
if f'PRIMER_LEFT_{i}_SEQUENCE' in primers:
|
| 112 |
+
data.append({
|
| 113 |
+
"Pair": i + 1,
|
| 114 |
+
"Left Seq": primers[f'PRIMER_LEFT_{i}_SEQUENCE'],
|
| 115 |
+
"Right Seq": primers[f'PRIMER_RIGHT_{i}_SEQUENCE'],
|
| 116 |
+
"Left TM": round(primers[f'PRIMER_LEFT_{i}_TM'], 2),
|
| 117 |
+
"Right TM": round(primers[f'PRIMER_RIGHT_{i}_TM'], 2),
|
| 118 |
+
"Product Size": primers[f'PRIMER_PAIR_{i}_PRODUCT_SIZE'],
|
| 119 |
+
"L_Pos": primers[f'PRIMER_LEFT_{i}'][0],
|
| 120 |
+
"L_Len": primers[f'PRIMER_LEFT_{i}'][1],
|
| 121 |
+
"R_Pos": primers[f'PRIMER_RIGHT_{i}'][0],
|
| 122 |
+
"R_Len": primers[f'PRIMER_RIGHT_{i}'][1],
|
| 123 |
+
})
|
| 124 |
+
|
| 125 |
+
df = pd.DataFrame(data)
|
| 126 |
+
st.subheader("Primer Table")
|
| 127 |
+
st.dataframe(df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]))
|
| 128 |
+
|
| 129 |
+
csv = df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]).to_csv(index=False).encode("utf-8")
|
| 130 |
+
st.download_button("Download Primer Table", csv, "primers.csv", "text/csv")
|
| 131 |
+
|
| 132 |
+
# Primer pair selection
|
| 133 |
+
selected_pair = st.selectbox("Select Primer Pair to Visualize", df["Pair"])
|
| 134 |
+
selected_row = df[df["Pair"] == selected_pair].iloc[0]
|
| 135 |
+
|
| 136 |
+
fig = plot_primer_binding(
|
| 137 |
+
seq_len=len(sequence),
|
| 138 |
+
left_start=selected_row["L_Pos"],
|
| 139 |
+
left_len=selected_row["L_Len"],
|
| 140 |
+
right_start=selected_row["R_Pos"],
|
| 141 |
+
right_len=selected_row["R_Len"],
|
| 142 |
+
product_size=selected_row["Product Size"]
|
| 143 |
)
|
| 144 |
+
st.pyplot(fig)
|
| 145 |
+
|
| 146 |
+
# Export options
|
| 147 |
+
format = st.radio("Download format", ["PNG", "SVG"])
|
| 148 |
+
buf = BytesIO()
|
| 149 |
+
fmt = "png" if format == "PNG" else "svg"
|
| 150 |
+
fig.savefig(buf, format=fmt, dpi=300)
|
| 151 |
+
st.download_button(f"Download Plot ({format})", buf.getvalue(), f"primer_plot.{fmt}", f"image/{fmt}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
# Footer
|
| 154 |
st.markdown("""
|