yashm commited on
Commit
d0aa318
·
verified ·
1 Parent(s): 5357bde

Update app.py

Browse files

✅ GenBank and FASTA upload
✅ Full GenBank record fetching from NCBI by accession ID
✅ Dynamic product size slider
✅ Primer binding visualization with pair selector
✅ Plot export as PNG or SVG

Files changed (1) hide show
  1. app.py +85 -57
app.py CHANGED
@@ -1,25 +1,27 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import primer3
4
- from Bio import SeqIO
5
  import matplotlib.pyplot as plt
6
  from io import StringIO, BytesIO
7
  import os
8
 
9
- st.set_page_config(page_title="PCR Primer Design", page_icon="🧬", layout="wide")
 
 
 
 
10
  st.sidebar.header("User Guide")
11
  st.sidebar.info("""
12
- 1. Upload a GenBank or FASTA file.
13
- 2. Select feature (if GenBank).
14
- 3. Set product size range and number of primers.
15
- 4. Design primers.
16
- 5. Select a pair to visualize and download plot.
17
  """)
18
 
19
- uploaded_file = st.file_uploader("Upload GenBank or FASTA file", type=['gb', 'gbk', 'fa', 'fasta'])
20
-
21
  def extract_features_from_genbank(content):
22
- record = SeqIO.read(StringIO(content.decode("utf-8")), "genbank")
23
  features = {'CDS': [], 'tRNA': [], 'gene': []}
24
  for f in record.features:
25
  if f.type in features:
@@ -29,6 +31,10 @@ def extract_features_from_genbank(content):
29
  def parse_fasta(content):
30
  return SeqIO.read(StringIO(content.decode("utf-8")), "fasta")
31
 
 
 
 
 
32
  def design_primers(seq, size_range, count):
33
  min_size, max_size = map(int, size_range.split('-'))
34
  return primer3.bindings.designPrimers(
@@ -62,67 +68,90 @@ def plot_primer_binding(seq_len, left_start, left_len, right_start, right_len, p
62
  ax.set_title("Primer Binding Sites and PCR Product", fontsize=14)
63
  return fig
64
 
65
- primer_df = None
66
- sequence = None
67
- df_for_plot = None
 
 
 
68
 
69
- if uploaded_file:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  ext = os.path.splitext(uploaded_file.name)[1].lower()
71
- if ext in ['.gb', '.gbk']:
72
- features, record = extract_features_from_genbank(uploaded_file.getvalue())
73
- st.subheader("Feature Selection")
74
- ftype = st.selectbox("Select feature type", ['CDS', 'tRNA', 'gene'])
75
- options = [
76
- f"{f.qualifiers.get('gene', [''])[0]} ({f.location.start}:{f.location.end}) [length: {len(f.location)} bp]"
77
- for f in features[ftype]
78
- ]
79
- selected = st.selectbox("Select feature", range(len(options)), format_func=lambda x: options[x])
80
- selected_feature = features[ftype][selected]
81
- sequence = selected_feature.extract(record.seq)
82
- elif ext in ['.fa', '.fasta']:
83
  record = parse_fasta(uploaded_file.getvalue())
84
  sequence = record.seq
85
- st.success(f"FASTA loaded (length: {len(sequence)} bp)")
 
 
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  if sequence:
88
  st.code(str(sequence), language="text")
89
- st.subheader("Primer Design Parameters")
90
- product_range = st.slider("PCR product size range", 100, len(sequence), (100, min(500, len(sequence))), step=10)
91
- primer_count = st.number_input("Number of primer pairs", 1, 20, 5)
92
-
93
- if "primers" not in st.session_state:
94
- st.session_state.primers = []
95
- st.session_state.df = pd.DataFrame()
96
 
97
  if st.button("Design Primers"):
98
  with st.spinner("Designing primers..."):
99
- primers = design_primers(sequence, f"{product_range[0]}-{product_range[1]}", primer_count)
100
  rows = []
101
- for i in range(primer_count):
102
- if f'PRIMER_LEFT_{i}_SEQUENCE' in primers:
103
  rows.append({
104
  "Pair": i + 1,
105
- "Left Seq": primers[f'PRIMER_LEFT_{i}_SEQUENCE'],
106
- "Right Seq": primers[f'PRIMER_RIGHT_{i}_SEQUENCE'],
107
- "Left TM": round(primers[f'PRIMER_LEFT_{i}_TM'], 2),
108
- "Right TM": round(primers[f'PRIMER_RIGHT_{i}_TM'], 2),
109
- "Product Size": primers[f'PRIMER_PAIR_{i}_PRODUCT_SIZE'],
110
- "L_Pos": primers[f'PRIMER_LEFT_{i}'][0],
111
- "L_Len": primers[f'PRIMER_LEFT_{i}'][1],
112
- "R_Pos": primers[f'PRIMER_RIGHT_{i}'][0],
113
- "R_Len": primers[f'PRIMER_RIGHT_{i}'][1],
114
  })
115
- st.session_state.df = pd.DataFrame(rows)
116
 
117
- if not st.session_state.df.empty:
118
- st.subheader("Designed Primers")
119
- st.dataframe(st.session_state.df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]))
120
 
121
- csv = st.session_state.df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]).to_csv(index=False).encode("utf-8")
122
  st.download_button("Download Primer Table", csv, "primers.csv", "text/csv")
123
 
124
- selected_pair = st.selectbox("Select Primer Pair to Visualize", st.session_state.df["Pair"])
125
- row = st.session_state.df[st.session_state.df["Pair"] == selected_pair].iloc[0]
126
 
127
  fig = plot_primer_binding(
128
  len(sequence),
@@ -132,11 +161,10 @@ if sequence:
132
  )
133
  st.pyplot(fig)
134
 
135
- export_fmt = st.radio("Download format", ["PNG", "SVG"])
136
  buf = BytesIO()
137
- fmt = "png" if export_fmt == "PNG" else "svg"
138
- fig.savefig(buf, format=fmt, dpi=300)
139
- st.download_button(f"Download Plot ({export_fmt})", buf.getvalue(), f"primer_plot.{fmt}", f"image/{fmt}")
140
 
141
  # Footer
142
  st.markdown("""
 
1
  import streamlit as st
2
  import pandas as pd
3
  import primer3
4
+ from Bio import SeqIO, Entrez
5
  import matplotlib.pyplot as plt
6
  from io import StringIO, BytesIO
7
  import os
8
 
9
+ # Required by NCBI
10
+ Entrez.email = "your.email@example.com"
11
+
12
+ st.set_page_config(page_title="PCR Primer Designer", layout="wide")
13
+
14
  st.sidebar.header("User Guide")
15
  st.sidebar.info("""
16
+ 1. Upload a GenBank/FASTA file or enter an NCBI accession ID.
17
+ 2. For GenBank: select a feature for primer design.
18
+ 3. Set PCR product size and number of primers.
19
+ 4. View, select, and download primer designs and visualizations.
 
20
  """)
21
 
22
+ # Helper functions
 
23
  def extract_features_from_genbank(content):
24
+ record = SeqIO.read(StringIO(content.decode("utf-8") if isinstance(content, bytes) else content), "genbank")
25
  features = {'CDS': [], 'tRNA': [], 'gene': []}
26
  for f in record.features:
27
  if f.type in features:
 
31
  def parse_fasta(content):
32
  return SeqIO.read(StringIO(content.decode("utf-8")), "fasta")
33
 
34
+ def fetch_genbank_from_ncbi(accession):
35
+ with Entrez.efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") as handle:
36
+ return handle.read()
37
+
38
  def design_primers(seq, size_range, count):
39
  min_size, max_size = map(int, size_range.split('-'))
40
  return primer3.bindings.designPrimers(
 
68
  ax.set_title("Primer Binding Sites and PCR Product", fontsize=14)
69
  return fig
70
 
71
+ # UI: NCBI or file
72
+ st.subheader("Input Sequence")
73
+ col1, col2 = st.columns(2)
74
+ with col1:
75
+ accession_input = st.text_input("Enter NCBI accession (e.g., NM_000546)")
76
+ fetch_btn = st.button("Fetch GenBank Record")
77
 
78
+ with col2:
79
+ uploaded_file = st.file_uploader("Or upload GenBank/FASTA", type=["gb", "gbk", "fa", "fasta"])
80
+
81
+ sequence = None
82
+ record = None
83
+ df = pd.DataFrame()
84
+
85
+ # Handle input
86
+ if fetch_btn and accession_input:
87
+ try:
88
+ gb_text = fetch_genbank_from_ncbi(accession_input.strip())
89
+ record = SeqIO.read(StringIO(gb_text), "genbank")
90
+ ext = ".gb"
91
+ st.success(f"Fetched GenBank record for {accession_input}")
92
+ except Exception as e:
93
+ st.error(f"Error fetching GenBank record: {e}")
94
+
95
+ elif uploaded_file:
96
  ext = os.path.splitext(uploaded_file.name)[1].lower()
97
+ if ext in ['.fa', '.fasta']:
 
 
 
 
 
 
 
 
 
 
 
98
  record = parse_fasta(uploaded_file.getvalue())
99
  sequence = record.seq
100
+ st.success(f"FASTA file loaded: {len(sequence)} bp")
101
+ else:
102
+ features, record = extract_features_from_genbank(uploaded_file.getvalue())
103
 
104
+ # Feature selection if GenBank
105
+ if record and hasattr(record, "features"):
106
+ features, record = extract_features_from_genbank(record.format("genbank"))
107
+ st.subheader("Feature Selection")
108
+ ftype = st.selectbox("Feature type", ['CDS', 'tRNA', 'gene'])
109
+ options = [
110
+ f"{f.qualifiers.get('gene', [''])[0]} ({f.location.start}:{f.location.end}) [length: {len(f.location)} bp]"
111
+ for f in features[ftype]
112
+ ]
113
+ selected = st.selectbox("Select feature", range(len(options)), format_func=lambda x: options[x])
114
+ selected_feature = features[ftype][selected]
115
+ sequence = selected_feature.extract(record.seq)
116
+ elif record and hasattr(record, "seq"):
117
+ sequence = record.seq
118
+
119
+ # Primer design UI
120
  if sequence:
121
  st.code(str(sequence), language="text")
122
+ st.subheader("Primer Design Settings")
123
+ size_range = st.slider("PCR product size (bp)", 100, len(sequence), (100, min(500, len(sequence))), step=10)
124
+ pair_count = st.number_input("Number of primer pairs", 1, 20, 5)
 
 
 
 
125
 
126
  if st.button("Design Primers"):
127
  with st.spinner("Designing primers..."):
128
+ result = design_primers(sequence, f"{size_range[0]}-{size_range[1]}", pair_count)
129
  rows = []
130
+ for i in range(pair_count):
131
+ if f'PRIMER_LEFT_{i}_SEQUENCE' in result:
132
  rows.append({
133
  "Pair": i + 1,
134
+ "Left Seq": result[f'PRIMER_LEFT_{i}_SEQUENCE'],
135
+ "Right Seq": result[f'PRIMER_RIGHT_{i}_SEQUENCE'],
136
+ "Left TM": round(result[f'PRIMER_LEFT_{i}_TM'], 2),
137
+ "Right TM": round(result[f'PRIMER_RIGHT_{i}_TM'], 2),
138
+ "Product Size": result[f'PRIMER_PAIR_{i}_PRODUCT_SIZE'],
139
+ "L_Pos": result[f'PRIMER_LEFT_{i}'][0],
140
+ "L_Len": result[f'PRIMER_LEFT_{i}'][1],
141
+ "R_Pos": result[f'PRIMER_RIGHT_{i}'][0],
142
+ "R_Len": result[f'PRIMER_RIGHT_{i}'][1],
143
  })
144
+ df = pd.DataFrame(rows)
145
 
146
+ if not df.empty:
147
+ st.subheader("Designed Primer Pairs")
148
+ st.dataframe(df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]))
149
 
150
+ csv = df.drop(columns=["L_Pos", "L_Len", "R_Pos", "R_Len"]).to_csv(index=False).encode("utf-8")
151
  st.download_button("Download Primer Table", csv, "primers.csv", "text/csv")
152
 
153
+ selected_pair = st.selectbox("Select Primer Pair to Visualize", df["Pair"])
154
+ row = df[df["Pair"] == selected_pair].iloc[0]
155
 
156
  fig = plot_primer_binding(
157
  len(sequence),
 
161
  )
162
  st.pyplot(fig)
163
 
164
+ fmt = st.radio("Export format", ["PNG", "SVG"])
165
  buf = BytesIO()
166
+ fig.savefig(buf, format=fmt.lower(), dpi=300)
167
+ st.download_button(f"Download Plot ({fmt})", buf.getvalue(), f"primer_plot.{fmt.lower()}", f"image/{fmt.lower()}")
 
168
 
169
  # Footer
170
  st.markdown("""