import streamlit as st import pandas as pd import primer3 from Bio import SeqIO import os from io import StringIO import matplotlib.pyplot as plt # Ensure the 'temp' directory exists for saving temporary files temp_dir = "temp" os.makedirs(temp_dir, exist_ok=True) # Streamlit UI setup st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide") uploaded_file = st.file_uploader("Upload a GenBank file", type=['gb', 'gbk']) def extract_features_from_genbank(genbank_content, feature_types=['CDS', 'tRNA', 'gene']): """Extracts specified features from GenBank content.""" text_stream = StringIO(genbank_content.decode("utf-8")) if isinstance(genbank_content, bytes) else genbank_content record = SeqIO.read(text_stream, "genbank") features = {ftype: [] for ftype in feature_types} for feature in record.features: if feature.type in feature_types: features[feature.type].append(feature) return features, record def design_primers_for_region(sequence, product_size_range, num_to_return=5): """Design primers for a specific sequence.""" # Parse the product size range size_min, size_max = map(int, product_size_range.split('-')) return primer3.bindings.designPrimers( { 'SEQUENCE_TEMPLATE': str(sequence), 'PRIMER_PRODUCT_SIZE_RANGE': [[size_min, size_max]] }, { 'PRIMER_OPT_SIZE': 20, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 23, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_NUM_RETURN': num_to_return, } ) def plot_pcr_product(sequence, primers, feature_length, num_pairs=5): """Visualize the PCR product based on primer locations relative to a feature.""" import matplotlib.pyplot as plt # Ensure matplotlib is imported here if not already done # Start the figure for plotting plt.figure(figsize=(10, 3)) plt.plot([0, feature_length], [1, 1], 'b-', label='Selected Feature') # Blue line represents the selected feature for i in range(num_pairs): left_key = f'PRIMER_LEFT_{i}_POSITION' # Correct key for left primer position right_key = f'PRIMER_RIGHT_{i}_POSITION' # Correct key for right primer position # Check if keys exist before trying to access them if left_key in primers and right_key in primers: left_pos = primers[left_key] right_pos = primers[right_key] # Draw lines for primers; left primer above (y=1.2) and right primer below (y=0.8) the feature line plt.plot([left_pos, left_pos], [1.1, 1.3], 'r-', label='Left Primer' if i == 0 else "") # Red line represents left primer plt.plot([right_pos, right_pos], [0.7, 0.9], 'g-', label='Right Primer' if i == 0 else "") # Green line represents right primer plt.text(left_pos, 1.4, f'L{i+1}', ha='center') # Label for left primer plt.text(right_pos, 0.6, f'R{i+1}', ha='center') # Label for right primer plt.ylim(0, 2) # Set the limits of Y-axis to make the plot clearer plt.title('Feature and Primer Positions') plt.xlabel('Nucleotide position') plt.legend() plt.grid(True) # Use st.pyplot() to display the plot in Streamlit st.pyplot(plt) if uploaded_file is not None: genbank_content = StringIO(uploaded_file.getvalue().decode("utf-8")) features, record = extract_features_from_genbank(genbank_content) feature_type = st.selectbox('Select feature type:', ['CDS', 'tRNA', 'gene']) feature_options = [f"{feature.qualifiers.get('gene', [''])[0]} ({feature.location})" for feature in features[feature_type]] selected_index = st.selectbox(f'Select a {feature_type}:', options=range(len(feature_options)), format_func=lambda x: feature_options[x]) selected_feature = features[feature_type][selected_index] feature_sequence = selected_feature.extract(record.seq) st.write(f"Selected {feature_type} sequence (length: {len(feature_sequence)} bp):") st.text(str(feature_sequence)) product_size_range = st.text_input("Enter desired PCR product size range (e.g., 150-500):", value="150-500") if st.button(f'Design Primers for selected {feature_type}'): primers = design_primers_for_region(feature_sequence, product_size_range, num_to_return=5) primer_data = [] for i in range(5): # Collect data for 5 primer pairs primer_info = { 'Primer Pair': i + 1, 'Left Sequence': primers.get(f'PRIMER_LEFT_{i}_SEQUENCE', 'N/A'), 'Right Sequence': primers.get(f'PRIMER_RIGHT_{i}_SEQUENCE', 'N/A'), 'Left TM (°C)': primers.get(f'PRIMER_LEFT_{i}_TM', 'N/A'), 'Right TM (°C)': primers.get(f'PRIMER_RIGHT_{i}_TM', 'N/A'), 'Left Length': primers.get(f'PRIMER_LEFT_{i}_SIZE', 'N/A'), 'Right Length': primers.get(f'PRIMER_RIGHT_{i}_SIZE', 'N/A'), 'PCR Product Size (bp)': primers.get(f'PRIMER_PAIR_{i}_PRODUCT_SIZE', 'N/A') } if primer_info['Left Sequence'] != 'N/A' and primer_info['Right Sequence'] != 'N/A': primer_data.append(primer_info) if primer_data: primer_df = pd.DataFrame(primer_data) st.write('### Designed Primers') st.dataframe(primer_df) csv = primer_df.to_csv(index=False).encode('utf-8') st.download_button( "Download Primers as CSV", csv, "primers.csv", "text/csv", key='download-csv' ) # Plotting PCR products st.write("### Visualization of PCR Products") feature_length = len(feature_sequence) # Length of the selected feature plot_pcr_product(feature_sequence, primers, feature_length, num_pairs=5) else: st.error('No primers were found. Please adjust your parameters and try again.')