File size: 6,173 Bytes
b0f8787
784e4aa
 
1a77f6d
784e4aa
d3184b9
691da78
b0f8787
784e4aa
 
 
b0f8787
784e4aa
 
 
b0f8787
1a77f6d
d3184b9
c748014
d3184b9
1a77f6d
 
 
 
 
626ac91
691da78
c748014
691da78
 
1a77f6d
784e4aa
1a77f6d
691da78
784e4aa
 
626ac91
 
 
784e4aa
626ac91
784e4aa
 
 
 
626ac91
784e4aa
 
16e20b2
 
e7f5415
 
16e20b2
 
 
e7f5415
691da78
e7f5415
 
 
 
 
 
16e20b2
 
 
 
 
 
e7f5415
16e20b2
 
691da78
16e20b2
e7f5415
79db332
16e20b2
79db332
691da78
e7f5415
784e4aa
d3184b9
 
1a77f6d
 
0c253b2
c748014
 
1a77f6d
691da78
c748014
691da78
784e4aa
1a77f6d
784e4aa
c748014
691da78
0c253b2
691da78
 
 
 
 
 
 
 
 
 
 
 
 
 
b0f8787
691da78
 
d0651b7
 
 
 
 
 
 
 
 
 
691da78
 
16e20b2
 
d0651b7
691da78
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import streamlit as st
import pandas as pd
import primer3
from Bio import SeqIO
import os
from io import StringIO
import matplotlib.pyplot as plt

# Ensure the 'temp' directory exists for saving temporary files
temp_dir = "temp"
os.makedirs(temp_dir, exist_ok=True)

# Streamlit UI setup
st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide")
uploaded_file = st.file_uploader("Upload a GenBank file", type=['gb', 'gbk'])

def extract_features_from_genbank(genbank_content, feature_types=['CDS', 'tRNA', 'gene']):
    """Extracts specified features from GenBank content."""
    text_stream = StringIO(genbank_content.decode("utf-8")) if isinstance(genbank_content, bytes) else genbank_content
    record = SeqIO.read(text_stream, "genbank")
    features = {ftype: [] for ftype in feature_types}
    for feature in record.features:
        if feature.type in feature_types:
            features[feature.type].append(feature)
    return features, record

def design_primers_for_region(sequence, product_size_range, num_to_return=5):
    """Design primers for a specific sequence."""
    # Parse the product size range
    size_min, size_max = map(int, product_size_range.split('-'))
    return primer3.bindings.designPrimers(
        {
            'SEQUENCE_TEMPLATE': str(sequence),
            'PRIMER_PRODUCT_SIZE_RANGE': [[size_min, size_max]]
        },
        {
            'PRIMER_OPT_SIZE': 20,
            'PRIMER_MIN_SIZE': 18,
            'PRIMER_MAX_SIZE': 23,
            'PRIMER_OPT_TM': 60.0,
            'PRIMER_MIN_TM': 57.0,
            'PRIMER_MAX_TM': 63.0,
            'PRIMER_MIN_GC': 20.0,
            'PRIMER_MAX_GC': 80.0,
            'PRIMER_NUM_RETURN': num_to_return,
        }
    )

def plot_pcr_product(sequence, primers, feature_length, num_pairs=5):
    """Visualize the PCR product based on primer locations relative to a feature."""
    import matplotlib.pyplot as plt  # Ensure matplotlib is imported here if not already done
    
    # Start the figure for plotting
    plt.figure(figsize=(10, 3))
    plt.plot([0, feature_length], [1, 1], 'b-', label='Selected Feature')  # Blue line represents the selected feature
    
    for i in range(num_pairs):
        left_key = f'PRIMER_LEFT_{i}_POSITION'  # Correct key for left primer position
        right_key = f'PRIMER_RIGHT_{i}_POSITION'  # Correct key for right primer position
        
        # Check if keys exist before trying to access them
        if left_key in primers and right_key in primers:
            left_pos = primers[left_key]
            right_pos = primers[right_key]
            # Draw lines for primers; left primer above (y=1.2) and right primer below (y=0.8) the feature line
            plt.plot([left_pos, left_pos], [1.1, 1.3], 'r-', label='Left Primer' if i == 0 else "")  # Red line represents left primer
            plt.plot([right_pos, right_pos], [0.7, 0.9], 'g-', label='Right Primer' if i == 0 else "")  # Green line represents right primer
            plt.text(left_pos, 1.4, f'L{i+1}', ha='center')  # Label for left primer
            plt.text(right_pos, 0.6, f'R{i+1}', ha='center')  # Label for right primer
    
    plt.ylim(0, 2)  # Set the limits of Y-axis to make the plot clearer
    plt.title('Feature and Primer Positions')
    plt.xlabel('Nucleotide position')
    plt.legend()
    plt.grid(True)
    
    # Use st.pyplot() to display the plot in Streamlit
    st.pyplot(plt)


if uploaded_file is not None:
    genbank_content = StringIO(uploaded_file.getvalue().decode("utf-8"))
    features, record = extract_features_from_genbank(genbank_content)
    feature_type = st.selectbox('Select feature type:', ['CDS', 'tRNA', 'gene'])

    feature_options = [f"{feature.qualifiers.get('gene', [''])[0]} ({feature.location})" for feature in features[feature_type]]
    selected_index = st.selectbox(f'Select a {feature_type}:', options=range(len(feature_options)), format_func=lambda x: feature_options[x])
    selected_feature = features[feature_type][selected_index]

    feature_sequence = selected_feature.extract(record.seq)
    st.write(f"Selected {feature_type} sequence (length: {len(feature_sequence)} bp):")
    st.text(str(feature_sequence))

    product_size_range = st.text_input("Enter desired PCR product size range (e.g., 150-500):", value="150-500")

    if st.button(f'Design Primers for selected {feature_type}'):
        primers = design_primers_for_region(feature_sequence, product_size_range, num_to_return=5)
        
        primer_data = []
        for i in range(5):  # Collect data for 5 primer pairs
            primer_info = {
                'Primer Pair': i + 1,
                'Left Sequence': primers.get(f'PRIMER_LEFT_{i}_SEQUENCE', 'N/A'),
                'Right Sequence': primers.get(f'PRIMER_RIGHT_{i}_SEQUENCE', 'N/A'),
                'Left TM (°C)': primers.get(f'PRIMER_LEFT_{i}_TM', 'N/A'),
                'Right TM (°C)': primers.get(f'PRIMER_RIGHT_{i}_TM', 'N/A'),
                'Left Length': primers.get(f'PRIMER_LEFT_{i}_SIZE', 'N/A'),
                'Right Length': primers.get(f'PRIMER_RIGHT_{i}_SIZE', 'N/A'),
                'PCR Product Size (bp)': primers.get(f'PRIMER_PAIR_{i}_PRODUCT_SIZE', 'N/A')
            }
            if primer_info['Left Sequence'] != 'N/A' and primer_info['Right Sequence'] != 'N/A':
                primer_data.append(primer_info)

        if primer_data:
            primer_df = pd.DataFrame(primer_data)
            st.write('### Designed Primers')
            st.dataframe(primer_df)
            csv = primer_df.to_csv(index=False).encode('utf-8')
            st.download_button(
                "Download Primers as CSV",
                csv,
                "primers.csv",
                "text/csv",
                key='download-csv'
            )
            # Plotting PCR products
            st.write("### Visualization of PCR Products")
            feature_length = len(feature_sequence)  # Length of the selected feature
            plot_pcr_product(feature_sequence, primers, feature_length, num_pairs=5)
        else:
            st.error('No primers were found. Please adjust your parameters and try again.')