yashm commited on
Commit
2612da0
·
verified ·
1 Parent(s): d5326d7

Update app.py

Browse files

✅ GenBank and FASTA support
✅ Dynamic product size slider
✅ Primer binding site visualization (publication-ready)
✅ Primer pair selection dropdown
✅ Export as PNG and SVG

Files changed (1) hide show
  1. app.py +119 -126
app.py CHANGED
@@ -2,59 +2,46 @@ import streamlit as st
2
  import pandas as pd
3
  import primer3
4
  from Bio import SeqIO
 
 
5
  import os
6
- from io import StringIO
7
 
8
  # Ensure temp directory exists
9
  temp_dir = "temp"
10
  os.makedirs(temp_dir, exist_ok=True)
11
 
12
- # Streamlit UI setup
13
  st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide")
14
-
15
- # User Documentation
16
  st.sidebar.header("User Guide")
17
- st.sidebar.info(
18
- """
19
- This app allows you to design PCR primers from GenBank or FASTA files.
20
-
21
- 1. Upload a GenBank or FASTA file.
22
- 2. If GenBank: choose a feature (CDS, gene, tRNA).
23
- 3. Adjust product size range and primer count.
24
- 4. Click to generate primers.
25
- """
26
- )
27
-
28
- # File uploader
29
- uploaded_file = st.file_uploader(
30
- "Upload a GenBank or FASTA file",
31
- type=['gb', 'gbk', 'fa', 'fasta'],
32
- help="Supported formats: .gb, .gbk (GenBank); .fa, .fasta (FASTA)"
33
- )
34
-
35
- # Helper functions
36
- def extract_features_from_genbank(genbank_content, feature_types=['CDS', 'tRNA', 'gene']):
37
- text_stream = StringIO(genbank_content.decode("utf-8")) if isinstance(genbank_content, bytes) else genbank_content
38
- record = SeqIO.read(text_stream, "genbank")
39
- features = {ftype: [] for ftype in feature_types}
40
- for feature in record.features:
41
- if feature.type in feature_types:
42
- features[feature.type].append(feature)
43
  return features, record
44
 
45
- def parse_fasta(fasta_content):
46
- text_stream = StringIO(fasta_content.decode("utf-8"))
47
- record = SeqIO.read(text_stream, "fasta")
48
- return record
49
 
50
- def design_primers_for_region(sequence, product_size_range, num_to_return=10):
51
- size_min, size_max = map(int, product_size_range.split('-'))
52
  return primer3.bindings.designPrimers(
 
53
  {
54
- 'SEQUENCE_TEMPLATE': str(sequence),
55
- 'PRIMER_PRODUCT_SIZE_RANGE': [[size_min, size_max]]
56
- },
57
- {
58
  'PRIMER_OPT_SIZE': 20,
59
  'PRIMER_MIN_SIZE': 18,
60
  'PRIMER_MAX_SIZE': 23,
@@ -63,99 +50,105 @@ def design_primers_for_region(sequence, product_size_range, num_to_return=10):
63
  'PRIMER_MAX_TM': 63.0,
64
  'PRIMER_MIN_GC': 20.0,
65
  'PRIMER_MAX_GC': 80.0,
66
- 'PRIMER_NUM_RETURN': num_to_return,
67
  }
68
  )
69
 
70
- # File processing logic
71
- if uploaded_file is not None:
72
- file_ext = os.path.splitext(uploaded_file.name)[-1].lower()
73
-
74
- if file_ext in ['.gb', '.gbk']:
75
- genbank_content = StringIO(uploaded_file.getvalue().decode("utf-8"))
76
- features, record = extract_features_from_genbank(genbank_content)
77
-
78
- st.write("## Feature Selection")
79
- feature_type = st.selectbox(
80
- 'Select feature type:',
81
- ['CDS', 'tRNA', 'gene'],
82
- help="Choose the feature type for primer design."
83
- )
84
-
85
- if features[feature_type]:
86
- feature_options = [
87
- f"{f.qualifiers.get('gene', [''])[0]} ({f.location}) [length: {len(f.location)} bp]"
88
- for f in features[feature_type]
89
- ]
90
- selected_index = st.selectbox(
91
- f"Select a {feature_type}:",
92
- options=range(len(feature_options)),
93
- format_func=lambda x: feature_options[x],
94
- help="Choose a specific feature."
95
- )
96
- selected_feature = features[feature_type][selected_index]
97
- sequence = selected_feature.extract(record.seq)
98
- st.code(str(sequence), language="text")
99
-
100
- elif file_ext in ['.fa', '.fasta']:
 
101
  record = parse_fasta(uploaded_file.getvalue())
102
  sequence = record.seq
103
- st.write(f"FASTA sequence loaded (length: {len(sequence)} bp):")
104
- st.code(str(sequence), language="text")
105
- else:
106
- st.error("Unsupported file format.")
107
- st.stop()
108
-
109
- st.write("## Primer Design Parameters")
110
-
111
- product_size_range = st.slider(
112
- "Select PCR product size range:",
113
- min_value=100,
114
- max_value=len(sequence),
115
- value=(100, min(500, len(sequence))),
116
- step=10,
117
- help="Range of the desired PCR product size."
118
- )
119
-
120
- min_num_primers = st.number_input(
121
- "Number of primer pairs to return:",
122
- min_value=1, value=5, step=1,
123
- help="How many primer pairs to return."
124
- )
125
 
126
- if st.button("Design Primers"):
127
- with st.spinner("Designing primers..."):
128
- primers = design_primers_for_region(
129
- sequence,
130
- f"{product_size_range[0]}-{product_size_range[1]}",
131
- num_to_return=min_num_primers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  )
133
-
134
- primer_data = []
135
- for i in range(min_num_primers):
136
- left = primers.get(f'PRIMER_LEFT_{i}_SEQUENCE', 'N/A')
137
- right = primers.get(f'PRIMER_RIGHT_{i}_SEQUENCE', 'N/A')
138
- if left != 'N/A' and right != 'N/A':
139
- primer_data.append({
140
- 'Primer Pair': i + 1,
141
- 'Left Sequence': left,
142
- 'Right Sequence': right,
143
- 'Left TM (°C)': primers.get(f'PRIMER_LEFT_{i}_TM', 'N/A'),
144
- 'Right TM (°C)': primers.get(f'PRIMER_RIGHT_{i}_TM', 'N/A'),
145
- 'Left Length': len(left),
146
- 'Right Length': len(right),
147
- 'PCR Product Size (bp)': primers.get(f'PRIMER_PAIR_{i}_PRODUCT_SIZE', 'N/A')
148
- })
149
-
150
- if primer_data:
151
- st.subheader('Designed Primers')
152
- df = pd.DataFrame(primer_data)
153
- st.table(df)
154
-
155
- csv = df.to_csv(index=False).encode('utf-8')
156
- st.download_button("Download Primers as CSV", csv, "primers.csv", "text/csv")
157
- else:
158
- st.error("No primers found. Try adjusting your parameters.")
159
 
160
  # Footer
161
  st.markdown("""
 
2
  import pandas as pd
3
  import primer3
4
  from Bio import SeqIO
5
+ import matplotlib.pyplot as plt
6
+ from io import StringIO, BytesIO
7
  import os
 
8
 
9
  # Ensure temp directory exists
10
  temp_dir = "temp"
11
  os.makedirs(temp_dir, exist_ok=True)
12
 
13
+ # UI setup
14
  st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide")
 
 
15
  st.sidebar.header("User Guide")
16
+ st.sidebar.info("""
17
+ 1. Upload a GenBank or FASTA file.
18
+ 2. If GenBank: select a feature to design primers.
19
+ 3. Adjust PCR product size range and number of pairs.
20
+ 4. Generate primers and select a pair to visualize.
21
+ 5. Download primer table and plot (PNG or SVG).
22
+ """)
23
+
24
+ # Upload input
25
+ uploaded_file = st.file_uploader("Upload GenBank or FASTA file", type=['gb', 'gbk', 'fa', 'fasta'])
26
+
27
+ def extract_features_from_genbank(content):
28
+ stream = StringIO(content.decode("utf-8"))
29
+ record = SeqIO.read(stream, "genbank")
30
+ features = {'CDS': [], 'tRNA': [], 'gene': []}
31
+ for f in record.features:
32
+ if f.type in features:
33
+ features[f.type].append(f)
 
 
 
 
 
 
 
 
34
  return features, record
35
 
36
+ def parse_fasta(content):
37
+ return SeqIO.read(StringIO(content.decode("utf-8")), "fasta")
 
 
38
 
39
+ def design_primers(seq, size_range, count):
40
+ size_min, size_max = map(int, size_range.split('-'))
41
  return primer3.bindings.designPrimers(
42
+ {'SEQUENCE_TEMPLATE': str(seq)},
43
  {
44
+ 'PRIMER_PRODUCT_SIZE_RANGE': [[size_min, size_max]],
 
 
 
45
  'PRIMER_OPT_SIZE': 20,
46
  'PRIMER_MIN_SIZE': 18,
47
  'PRIMER_MAX_SIZE': 23,
 
50
  'PRIMER_MAX_TM': 63.0,
51
  'PRIMER_MIN_GC': 20.0,
52
  'PRIMER_MAX_GC': 80.0,
53
+ 'PRIMER_NUM_RETURN': count,
54
  }
55
  )
56
 
57
+ def plot_primer_binding(seq_len, left_start, left_len, right_start, right_len, product_size):
58
+ fig, ax = plt.subplots(figsize=(12, 2))
59
+ ax.hlines(0, 0, seq_len, color='black', linewidth=2)
60
+ ax.plot([left_start, left_start + left_len], [0.2, 0.2], color='blue', linewidth=4)
61
+ ax.text(left_start, 0.35, 'Left Primer', color='blue', fontsize=12)
62
+ ax.plot([right_start - right_len, right_start], [-0.2, -0.2], color='red', linewidth=4)
63
+ ax.text(right_start - right_len, -0.4, 'Right Primer', color='red', fontsize=12)
64
+ ax.hlines(0.05, left_start + left_len, right_start - right_len, color='gray', linestyle='--', linewidth=2)
65
+ ax.text((left_start + right_start) // 2, 0.1, f'{product_size} bp', fontsize=12, ha='center', color='gray')
66
+ ax.set_ylim(-1, 1)
67
+ ax.set_xlim(-20, seq_len + 20)
68
+ ax.axis('off')
69
+ ax.set_title("Primer Binding Sites and PCR Product", fontsize=14)
70
+ return fig
71
+
72
+ if uploaded_file:
73
+ ext = os.path.splitext(uploaded_file.name)[1].lower()
74
+ sequence = None
75
+ record = None
76
+
77
+ if ext in ['.gb', '.gbk']:
78
+ features, record = extract_features_from_genbank(uploaded_file.getvalue())
79
+ st.subheader("Feature Selection")
80
+ ftype = st.selectbox("Select feature type", ['CDS', 'tRNA', 'gene'])
81
+ options = [
82
+ f"{f.qualifiers.get('gene', [''])[0]} ({f.location.start}:{f.location.end}) [length: {len(f.location)} bp]"
83
+ for f in features[ftype]
84
+ ]
85
+ selected = st.selectbox("Select feature", range(len(options)), format_func=lambda x: options[x])
86
+ selected_feature = features[ftype][selected]
87
+ sequence = selected_feature.extract(record.seq)
88
+ elif ext in ['.fa', '.fasta']:
89
  record = parse_fasta(uploaded_file.getvalue())
90
  sequence = record.seq
91
+ st.success(f"FASTA file loaded: {len(sequence)} bp sequence")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ if sequence:
94
+ st.code(str(sequence), language="text")
95
+ st.subheader("Primer Design Parameters")
96
+ product_range = st.slider(
97
+ "PCR product size range",
98
+ min_value=100,
99
+ max_value=len(sequence),
100
+ value=(100, min(500, len(sequence))),
101
+ step=10
102
+ )
103
+ primer_count = st.number_input("Number of primer pairs", min_value=1, value=5, step=1)
104
+
105
+ if st.button("Design Primers"):
106
+ with st.spinner("Designing primers..."):
107
+ primers = design_primers(sequence, f"{product_range[0]}-{product_range[1]}", primer_count)
108
+
109
+ data = []
110
+ for i in range(primer_count):
111
+ if f'PRIMER_LEFT_{i}_SEQUENCE' in primers:
112
+ data.append({
113
+ "Pair": i + 1,
114
+ "Left Seq": primers[f'PRIMER_LEFT_{i}_SEQUENCE'],
115
+ "Right Seq": primers[f'PRIMER_RIGHT_{i}_SEQUENCE'],
116
+ "Left TM": round(primers[f'PRIMER_LEFT_{i}_TM'], 2),
117
+ "Right TM": round(primers[f'PRIMER_RIGHT_{i}_TM'], 2),
118
+ "Product Size": primers[f'PRIMER_PAIR_{i}_PRODUCT_SIZE'],
119
+ "L_Pos": primers[f'PRIMER_LEFT_{i}'][0],
120
+ "L_Len": primers[f'PRIMER_LEFT_{i}'][1],
121
+ "R_Pos": primers[f'PRIMER_RIGHT_{i}'][0],
122
+ "R_Len": primers[f'PRIMER_RIGHT_{i}'][1],
123
+ })
124
+
125
+ df = pd.DataFrame(data)
126
+ st.subheader("Primer Table")
127
+ st.dataframe(df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]))
128
+
129
+ csv = df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]).to_csv(index=False).encode("utf-8")
130
+ st.download_button("Download Primer Table", csv, "primers.csv", "text/csv")
131
+
132
+ # Primer pair selection
133
+ selected_pair = st.selectbox("Select Primer Pair to Visualize", df["Pair"])
134
+ selected_row = df[df["Pair"] == selected_pair].iloc[0]
135
+
136
+ fig = plot_primer_binding(
137
+ seq_len=len(sequence),
138
+ left_start=selected_row["L_Pos"],
139
+ left_len=selected_row["L_Len"],
140
+ right_start=selected_row["R_Pos"],
141
+ right_len=selected_row["R_Len"],
142
+ product_size=selected_row["Product Size"]
143
  )
144
+ st.pyplot(fig)
145
+
146
+ # Export options
147
+ format = st.radio("Download format", ["PNG", "SVG"])
148
+ buf = BytesIO()
149
+ fmt = "png" if format == "PNG" else "svg"
150
+ fig.savefig(buf, format=fmt, dpi=300)
151
+ st.download_button(f"Download Plot ({format})", buf.getvalue(), f"primer_plot.{fmt}", f"image/{fmt}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  # Footer
154
  st.markdown("""