OpenPrimer / app.py
yashm's picture
Update app.py
16e20b2 verified
raw
history blame
6.17 kB
import streamlit as st
import pandas as pd
import primer3
from Bio import SeqIO
import os
from io import StringIO
import matplotlib.pyplot as plt
# Ensure the 'temp' directory exists for saving temporary files
temp_dir = "temp"
os.makedirs(temp_dir, exist_ok=True)
# Streamlit UI setup
st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide")
uploaded_file = st.file_uploader("Upload a GenBank file", type=['gb', 'gbk'])
def extract_features_from_genbank(genbank_content, feature_types=['CDS', 'tRNA', 'gene']):
"""Extracts specified features from GenBank content."""
text_stream = StringIO(genbank_content.decode("utf-8")) if isinstance(genbank_content, bytes) else genbank_content
record = SeqIO.read(text_stream, "genbank")
features = {ftype: [] for ftype in feature_types}
for feature in record.features:
if feature.type in feature_types:
features[feature.type].append(feature)
return features, record
def design_primers_for_region(sequence, product_size_range, num_to_return=5):
"""Design primers for a specific sequence."""
# Parse the product size range
size_min, size_max = map(int, product_size_range.split('-'))
return primer3.bindings.designPrimers(
{
'SEQUENCE_TEMPLATE': str(sequence),
'PRIMER_PRODUCT_SIZE_RANGE': [[size_min, size_max]]
},
{
'PRIMER_OPT_SIZE': 20,
'PRIMER_MIN_SIZE': 18,
'PRIMER_MAX_SIZE': 23,
'PRIMER_OPT_TM': 60.0,
'PRIMER_MIN_TM': 57.0,
'PRIMER_MAX_TM': 63.0,
'PRIMER_MIN_GC': 20.0,
'PRIMER_MAX_GC': 80.0,
'PRIMER_NUM_RETURN': num_to_return,
}
)
def plot_pcr_product(sequence, primers, feature_length, num_pairs=5):
"""Visualize the PCR product based on primer locations relative to a feature."""
import matplotlib.pyplot as plt # Ensure matplotlib is imported here if not already done
# Start the figure for plotting
plt.figure(figsize=(10, 3))
plt.plot([0, feature_length], [1, 1], 'b-', label='Selected Feature') # Blue line represents the selected feature
for i in range(num_pairs):
left_key = f'PRIMER_LEFT_{i}_POSITION' # Correct key for left primer position
right_key = f'PRIMER_RIGHT_{i}_POSITION' # Correct key for right primer position
# Check if keys exist before trying to access them
if left_key in primers and right_key in primers:
left_pos = primers[left_key]
right_pos = primers[right_key]
# Draw lines for primers; left primer above (y=1.2) and right primer below (y=0.8) the feature line
plt.plot([left_pos, left_pos], [1.1, 1.3], 'r-', label='Left Primer' if i == 0 else "") # Red line represents left primer
plt.plot([right_pos, right_pos], [0.7, 0.9], 'g-', label='Right Primer' if i == 0 else "") # Green line represents right primer
plt.text(left_pos, 1.4, f'L{i+1}', ha='center') # Label for left primer
plt.text(right_pos, 0.6, f'R{i+1}', ha='center') # Label for right primer
plt.ylim(0, 2) # Set the limits of Y-axis to make the plot clearer
plt.title('Feature and Primer Positions')
plt.xlabel('Nucleotide position')
plt.legend()
plt.grid(True)
# Use st.pyplot() to display the plot in Streamlit
st.pyplot(plt)
if uploaded_file is not None:
genbank_content = StringIO(uploaded_file.getvalue().decode("utf-8"))
features, record = extract_features_from_genbank(genbank_content)
feature_type = st.selectbox('Select feature type:', ['CDS', 'tRNA', 'gene'])
feature_options = [f"{feature.qualifiers.get('gene', [''])[0]} ({feature.location})" for feature in features[feature_type]]
selected_index = st.selectbox(f'Select a {feature_type}:', options=range(len(feature_options)), format_func=lambda x: feature_options[x])
selected_feature = features[feature_type][selected_index]
feature_sequence = selected_feature.extract(record.seq)
st.write(f"Selected {feature_type} sequence (length: {len(feature_sequence)} bp):")
st.text(str(feature_sequence))
product_size_range = st.text_input("Enter desired PCR product size range (e.g., 150-500):", value="150-500")
if st.button(f'Design Primers for selected {feature_type}'):
primers = design_primers_for_region(feature_sequence, product_size_range, num_to_return=5)
primer_data = []
for i in range(5): # Collect data for 5 primer pairs
primer_info = {
'Primer Pair': i + 1,
'Left Sequence': primers.get(f'PRIMER_LEFT_{i}_SEQUENCE', 'N/A'),
'Right Sequence': primers.get(f'PRIMER_RIGHT_{i}_SEQUENCE', 'N/A'),
'Left TM (°C)': primers.get(f'PRIMER_LEFT_{i}_TM', 'N/A'),
'Right TM (°C)': primers.get(f'PRIMER_RIGHT_{i}_TM', 'N/A'),
'Left Length': primers.get(f'PRIMER_LEFT_{i}_SIZE', 'N/A'),
'Right Length': primers.get(f'PRIMER_RIGHT_{i}_SIZE', 'N/A'),
'PCR Product Size (bp)': primers.get(f'PRIMER_PAIR_{i}_PRODUCT_SIZE', 'N/A')
}
if primer_info['Left Sequence'] != 'N/A' and primer_info['Right Sequence'] != 'N/A':
primer_data.append(primer_info)
if primer_data:
primer_df = pd.DataFrame(primer_data)
st.write('### Designed Primers')
st.dataframe(primer_df)
csv = primer_df.to_csv(index=False).encode('utf-8')
st.download_button(
"Download Primers as CSV",
csv,
"primers.csv",
"text/csv",
key='download-csv'
)
# Plotting PCR products
st.write("### Visualization of PCR Products")
feature_length = len(feature_sequence) # Length of the selected feature
plot_pcr_product(feature_sequence, primers, feature_length, num_pairs=5)
else:
st.error('No primers were found. Please adjust your parameters and try again.')