alidenewade commited on
Commit
49aea16
·
verified ·
1 Parent(s): 8b0eb26

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +370 -0
app.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import matplotlib.pyplot as plt
3
+ from Bio import SeqIO
4
+ from Bio.Seq import Seq # Though not directly used in final logic, good for context
5
+ from Bio.Restriction import RestrictionBatch, AllEnzymes, Analysis
6
+ import os # For getting filename
7
+
8
+ # Ensure matplotlib uses a non-interactive backend for Gradio
9
+ import matplotlib
10
+ matplotlib.use('Agg')
11
+
12
+ # --- Core BioPython and Plotting Functions ---
13
+
14
+ def simulate_digest_and_plot_gradio(plasmid_seq_record, enzyme_name, plasmid_label):
15
+ """
16
+ Simulates restriction digest and plots a virtual agarose gel.
17
+ Uses enzyme.catalyse() for robust fragment generation.
18
+ """
19
+ fig, ax = plt.subplots(figsize=(6, 8)) # Adjusted size for better readability
20
+
21
+ try:
22
+ # enzyme_name is a string, get the Biopython enzyme object
23
+ enzyme = AllEnzymes.get(str(enzyme_name)) # Ensure enzyme_name is string
24
+ if not enzyme:
25
+ raise ValueError(f"Enzyme '{enzyme_name}' not found in Biopython's AllEnzymes.")
26
+ except Exception as e:
27
+ ax.text(0.5, 0.5, f"Error: Could not load enzyme '{enzyme_name}'.\n{e}",
28
+ ha='center', va='center', wrap=True, color='red')
29
+ ax.set_xticks([]); ax.set_yticks([])
30
+ ax.set_title(f"Virtual Gel: {plasmid_label} - Error", fontsize=10)
31
+ plt.tight_layout()
32
+ return fig
33
+
34
+ # Use enzyme.catalyse() to get fragments directly
35
+ fragments_seqs = enzyme.catalyse(plasmid_seq_record.seq)
36
+
37
+ is_uncut = False
38
+ if len(fragments_seqs) == 1 and len(fragments_seqs[0]) == len(plasmid_seq_record.seq):
39
+ # Further check: does the enzyme actually have sites?
40
+ # If catalyse returns the original sequence, it might be circular and cut once,
41
+ # or linear and uncut, or truly no sites.
42
+ if not enzyme.search(plasmid_seq_record.seq):
43
+ is_uncut = True
44
+
45
+ if is_uncut:
46
+ ax.text(0.5, 0.5, f"Enzyme {enzyme_name} does not cut {plasmid_label}",
47
+ ha='center', va='center', wrap=True)
48
+ ax.set_title(f"Virtual Gel: {plasmid_label} + {enzyme_name} (No Sites)", fontsize=10)
49
+ # Still show the uncut plasmid band
50
+ lengths = [len(plasmid_seq_record.seq)]
51
+ else:
52
+ lengths = sorted([len(f) for f in fragments_seqs], reverse=True)
53
+
54
+ ax.set_yscale("log")
55
+
56
+ min_display_size = 10 # bp
57
+ plasmid_len_for_scale = max(len(plasmid_seq_record.seq), min_display_size * 10) # Ensure decent scale range
58
+ # Ensure max_display_size is greater than min_display_size
59
+ max_display_size = max(plasmid_len_for_scale * 1.1, min_display_size * 2)
60
+
61
+ ax.set_ylim(min_display_size, max_display_size)
62
+
63
+ band_width = 0.6
64
+ lane_center = 0.5
65
+
66
+ if not lengths: # Should not happen if is_uncut is handled
67
+ ax.text(0.5, 0.5, "No fragments to display.", ha='center', va='center')
68
+ else:
69
+ for i, size in enumerate(lengths):
70
+ if size < min_display_size:
71
+ ax.text(lane_center, min_display_size * 1.1 , f"(+ {len(lengths) - i} small fragments < {min_display_size}bp not shown)",
72
+ ha='center', va='top', fontsize=7, color='gray')
73
+ break
74
+ ax.plot([lane_center - band_width/2, lane_center + band_width/2], [size, size],
75
+ linewidth=6, color='royalblue', solid_capstyle='butt')
76
+ ax.text(lane_center + band_width/2 + 0.05, size, f"{size} bp",
77
+ va='center', ha='left', fontsize=8)
78
+
79
+ ax.invert_yaxis()
80
+ ax.set_title(f"Virtual Gel: {plasmid_label} digested with {enzyme_name}", fontsize=10)
81
+ ax.set_ylabel("Fragment Size (bp)", fontsize=9)
82
+ ax.set_xlabel("Lane 1", fontsize=9) # Indicate it's one lane
83
+ ax.set_xticks([]) # No x-axis ticks for a single lane view
84
+ ax.tick_params(axis='y', labelsize=8)
85
+
86
+ # Draw well at the top (after y-axis inversion)
87
+ # The y-axis top is effectively max_display_size after inversion.
88
+ well_top_y = ax.get_ylim()[0] # This is the largest value on the y-axis (top of inverted plot)
89
+
90
+ # Draw well slightly above the max data point or at the very top
91
+ well_line_y = well_top_y * 1.01 # Position for the horizontal line of the well
92
+ well_depth_y = well_top_y * 0.98 # Bottom of the well sides (relative depth)
93
+
94
+ ax.plot([lane_center - band_width/1.5, lane_center + band_width/1.5], [well_line_y, well_line_y],
95
+ linewidth=1.5, color='black') # Top line of well
96
+ ax.plot([lane_center - band_width/1.5, lane_center - band_width/1.5], [well_line_y, well_depth_y],
97
+ linewidth=1.5, color='black') # Left side of well
98
+ ax.plot([lane_center + band_width/1.5, lane_center + band_width/1.5], [well_line_y, well_depth_y],
99
+ linewidth=1.5, color='black') # Right side of well
100
+
101
+ plt.tight_layout(pad=1.5)
102
+ return fig
103
+
104
+ def analyze_plasmids_gradio(file1_path, file2_path, current_plasmid_choice_for_plot):
105
+ """
106
+ Analyzes two plasmid files to find unique restriction enzymes.
107
+ Returns status messages, plasmid data, lists of unique enzyme names,
108
+ and an update for the enzyme selection dropdown.
109
+ """
110
+ initial_enzyme_dd_update = gr.update(choices=["Analyze plasmids first"], value="Analyze plasmids first", interactive=False)
111
+
112
+ if file1_path is None or file2_path is None:
113
+ return "Error: Please upload both plasmid files.", "", "", None, None, [], [], initial_enzyme_dd_update
114
+
115
+ try:
116
+ # file_path is already a string (path to temp file) when type="filepath"
117
+ def read_plasmid(filepath, filename_for_error):
118
+ try:
119
+ return SeqIO.read(filepath, "genbank")
120
+ except Exception: # Broad exception for parsing
121
+ try:
122
+ return SeqIO.read(filepath, "fasta")
123
+ except Exception as e_fasta:
124
+ # More specific error message
125
+ raise ValueError(f"Could not parse '{filename_for_error}'. Ensure it's a valid GenBank or FASTA file. Last error: {e_fasta}")
126
+
127
+ # Get original filenames for messages
128
+ p1_orig_filename = os.path.basename(file1_path)
129
+ p2_orig_filename = os.path.basename(file2_path)
130
+
131
+ plasmid1_seq_rec = read_plasmid(file1_path, p1_orig_filename)
132
+ plasmid2_seq_rec = read_plasmid(file2_path, p2_orig_filename)
133
+
134
+ except Exception as e:
135
+ return str(e), "", "", None, None, [], [], initial_enzyme_dd_update
136
+
137
+ # Filter for valid enzymes from AllEnzymes
138
+ # Some entries in AllEnzymes might be None or lack necessary attributes
139
+ valid_enzyme_objects = []
140
+ for enz_name in AllEnzymes.elements(): # Iterate over names to get objects
141
+ enzyme_obj = AllEnzymes.get(enz_name)
142
+ if enzyme_obj and hasattr(enzyme_obj, 'site') and enzyme_obj.site is not None:
143
+ # Further check if it's a real enzyme, not a category like 'Commercial'
144
+ if hasattr(enzyme_obj, 'is_restriction') and enzyme_obj.is_restriction():
145
+ valid_enzyme_objects.append(enzyme_obj)
146
+ elif not hasattr(enzyme_obj, 'is_restriction'): # If it doesn't have this, assume it's a basic enzyme type
147
+ valid_enzyme_objects.append(enzyme_obj)
148
+
149
+
150
+ if not valid_enzyme_objects:
151
+ return "Error: Could not load any restriction enzymes from Biopython.", "", "", None, None, [], [], initial_enzyme_dd_update
152
+
153
+ enzymes_batch = RestrictionBatch(valid_enzyme_objects)
154
+
155
+ # Assuming circular plasmids, common for this type of analysis
156
+ analysis1 = Analysis(enzymes_batch, plasmid1_seq_rec.seq, linear=False)
157
+ analysis2 = Analysis(enzymes_batch, plasmid2_seq_rec.seq, linear=False)
158
+
159
+ enzymes_cutting_p1 = set(analysis1.with_sites().keys())
160
+ enzymes_cutting_p2 = set(analysis2.with_sites().keys())
161
+
162
+ unique_to_1_obj = sorted(list(enzymes_cutting_p1 - enzymes_cutting_p2), key=lambda e: str(e))
163
+ unique_to_2_obj = sorted(list(enzymes_cutting_p2 - enzymes_cutting_p1), key=lambda e: str(e))
164
+
165
+ unique_to_1_names = [str(e) for e in unique_to_1_obj]
166
+ unique_to_2_names = [str(e) for e in unique_to_2_obj]
167
+
168
+ p1_display_label = f"Plasmid 1 ({p1_orig_filename})"
169
+ p2_display_label = f"Plasmid 2 ({p2_orig_filename})"
170
+
171
+ msg1 = f"Enzymes cutting only {p1_display_label} ({len(unique_to_1_names)}):\n" + ", ".join(unique_to_1_names) if unique_to_1_names else f"No unique enzymes found for {p1_display_label}."
172
+ msg2 = f"Enzymes cutting only {p2_display_label} ({len(unique_to_2_names)}):\n" + ", ".join(unique_to_2_names) if unique_to_2_names else f"No unique enzymes found for {p2_display_label}."
173
+
174
+ status = "Analysis complete."
175
+ if not unique_to_1_names and not unique_to_2_names:
176
+ status += " No enzymes found that uniquely cut only one of the plasmids."
177
+
178
+ # Determine initial choices for the enzyme dropdown based on current_plasmid_choice_for_plot
179
+ # current_plasmid_choice_for_plot is "Plasmid 1" or "Plasmid 2"
180
+ dd_choices = []
181
+ if current_plasmid_choice_for_plot == "Plasmid 1":
182
+ dd_choices = unique_to_1_names if unique_to_1_names else [f"No unique enzymes for {p1_display_label}"]
183
+ else: # Plasmid 2
184
+ dd_choices = unique_to_2_names if unique_to_2_names else [f"No unique enzymes for {p2_display_label}"]
185
+
186
+ if (current_plasmid_choice_for_plot == "Plasmid 1" and unique_to_1_names) or \
187
+ (current_plasmid_choice_for_plot == "Plasmid 2" and unique_to_2_names):
188
+ initial_enzyme_dd_update = gr.update(choices=["Select an enzyme"] + dd_choices, value="Select an enzyme", interactive=True)
189
+ else:
190
+ initial_enzyme_dd_update = gr.update(choices=dd_choices, value=dd_choices[0], interactive=False if not dd_choices or "No unique" in dd_choices[0] else True)
191
+
192
+ return status, msg1, msg2, plasmid1_seq_rec, plasmid2_seq_rec, unique_to_1_names, unique_to_2_names, initial_enzyme_dd_update
193
+
194
+ def plot_selected_digest_controller(plasmid_choice_label, enzyme_name, p1_data, p2_data):
195
+ """
196
+ Controller to select the correct plasmid data and call the plotting function.
197
+ """
198
+ fig_placeholder, ax_placeholder = plt.subplots(figsize=(6, 8))
199
+ ax_placeholder.text(0.5, 0.5, "Plot will appear here.", ha='center', va='center')
200
+ ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([])
201
+ plt.tight_layout()
202
+
203
+ if not enzyme_name or enzyme_name == "Select an enzyme" or "No unique enzymes" in enzyme_name or "Analyze plasmids first" in enzyme_name:
204
+ ax_placeholder.clear()
205
+ ax_placeholder.text(0.5, 0.5, "Please select a valid plasmid and enzyme after analysis.", ha='center', va='center', wrap=True)
206
+ ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([])
207
+ plt.tight_layout()
208
+ return fig_placeholder
209
+
210
+ target_plasmid_rec = None
211
+ target_label = ""
212
+
213
+ if plasmid_choice_label == "Plasmid 1":
214
+ if p1_data is None:
215
+ ax_placeholder.clear()
216
+ ax_placeholder.text(0.5, 0.5, "Plasmid 1 data not loaded. Please re-analyze.", ha='center', va='center', wrap=True, color='red')
217
+ ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([])
218
+ plt.tight_layout()
219
+ return fig_placeholder
220
+ target_plasmid_rec = p1_data
221
+ target_label = "Plasmid 1"
222
+ if hasattr(p1_data, 'name') and p1_data.name: target_label += f" ({p1_data.name})"
223
+ elif hasattr(p1_data, 'id') and p1_data.id: target_label += f" ({p1_data.id})"
224
+
225
+
226
+ elif plasmid_choice_label == "Plasmid 2":
227
+ if p2_data is None:
228
+ ax_placeholder.clear()
229
+ ax_placeholder.text(0.5, 0.5, "Plasmid 2 data not loaded. Please re-analyze.", ha='center', va='center', wrap=True, color='red')
230
+ ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([])
231
+ plt.tight_layout()
232
+ return fig_placeholder
233
+ target_plasmid_rec = p2_data
234
+ target_label = "Plasmid 2"
235
+ if hasattr(p2_data, 'name') and p2_data.name: target_label += f" ({p2_data.name})"
236
+ elif hasattr(p2_data, 'id') and p2_data.id: target_label += f" ({p2_data.id})"
237
+
238
+ else: # Should not happen
239
+ ax_placeholder.clear()
240
+ ax_placeholder.text(0.5, 0.5, "Invalid plasmid selection.", ha='center', va='center', wrap=True, color='red')
241
+ ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([])
242
+ plt.tight_layout()
243
+ return fig_placeholder
244
+
245
+ return simulate_digest_and_plot_gradio(target_plasmid_rec, enzyme_name, target_label)
246
+
247
+ def update_enzyme_dropdown_choices_on_radio_change(plasmid_choice_label, p1_enzyme_names, p2_enzyme_names):
248
+ """
249
+ Updates the enzyme dropdown choices when the plasmid selection radio button changes.
250
+ """
251
+ if plasmid_choice_label == "Plasmid 1":
252
+ choices = p1_enzyme_names if p1_enzyme_names else ["No unique enzymes for P1"]
253
+ if p1_enzyme_names: # If there are actual enzymes
254
+ return gr.update(choices=["Select an enzyme"] + choices, value="Select an enzyme", interactive=True)
255
+ return gr.update(choices=choices, value=choices[0], interactive=False) # No unique enzymes, so not interactive
256
+
257
+ elif plasmid_choice_label == "Plasmid 2":
258
+ choices = p2_enzyme_names if p2_enzyme_names else ["No unique enzymes for P2"]
259
+ if p2_enzyme_names: # If there are actual enzymes
260
+ return gr.update(choices=["Select an enzyme"] + choices, value="Select an enzyme", interactive=True)
261
+ return gr.update(choices=choices, value=choices[0], interactive=False) # No unique enzymes, so not interactive
262
+
263
+ return gr.update(choices=[], value=None, interactive=False) # Fallback, should not be reached
264
+
265
+
266
+ # --- Gradio Interface Definition ---
267
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
268
+ gr.Markdown("# Plasmid Restriction Digest Analyzer & Virtual Gel")
269
+ gr.Markdown(
270
+ "**Instructions:**\n"
271
+ "1. Upload two plasmid sequence files (GenBank `.gb`/`.gbk` or FASTA `.fasta`/`.fna`/`.fa` format).\n"
272
+ "2. Click `Analyze Plasmids`. Results will show enzymes that uniquely cut one plasmid but not the other.\n"
273
+ "3. Select which plasmid's unique enzymes you want to consider for plotting.\n"
274
+ "4. Choose a specific enzyme from the dropdown list.\n"
275
+ "5. Click `Generate Gel Plot` to visualize the digestion pattern."
276
+ )
277
+
278
+ # States to store full plasmid SeqRecord objects and lists of unique enzyme names
279
+ plasmid1_data_state = gr.State()
280
+ plasmid2_data_state = gr.State()
281
+ p1_unique_enzymes_list_state = gr.State([]) # Stores list of names for P1 unique enzymes
282
+ p2_unique_enzymes_list_state = gr.State([]) # Stores list of names for P2 unique enzymes
283
+
284
+ with gr.Row():
285
+ with gr.Column(scale=1):
286
+ gr.Markdown("### 1. Upload Plasmids & Analyze")
287
+ file_p1 = gr.File(label="Plasmid 1 File", type="filepath", file_types=[".gb", ".gbk", ".fasta", ".fna", ".fa"])
288
+ file_p2 = gr.File(label="Plasmid 2 File", type="filepath", file_types=[".gb", ".gbk", ".fasta", ".fna", ".fa"])
289
+
290
+ # Hidden component to pass the current plasmid choice to the analysis function
291
+ # This helps initialize the enzyme dropdown correctly after analysis
292
+ _current_plasmid_choice_for_plot_hidden = gr.Textbox(value="Plasmid 1", visible=False)
293
+
294
+ analyze_btn = gr.Button("Analyze Plasmids", variant="primary", elem_id="analyze_button")
295
+
296
+ with gr.Column(scale=2):
297
+ gr.Markdown("### Analysis Results")
298
+ status_message_txt = gr.Textbox(label="Status", interactive=False, lines=1, max_lines=2)
299
+ unique_enzymes_p1_txt = gr.Textbox(label="Enzymes cutting only Plasmid 1", interactive=False, lines=3, max_lines=6)
300
+ unique_enzymes_p2_txt = gr.Textbox(label="Enzymes cutting only Plasmid 2", interactive=False, lines=3, max_lines=6)
301
+
302
+ gr.Markdown("---")
303
+ gr.Markdown("### 2. Visualize Digestion on Virtual Gel")
304
+
305
+ with gr.Row():
306
+ with gr.Column(scale=1):
307
+ plasmid_to_plot_choice_radio = gr.Radio(
308
+ choices=["Plasmid 1", "Plasmid 2"],
309
+ label="Select Plasmid for Gel Visualization",
310
+ value="Plasmid 1", # Default choice
311
+ interactive=True
312
+ )
313
+
314
+ enzyme_for_plot_dropdown = gr.Dropdown(
315
+ label="Select Unique Enzyme",
316
+ choices=["Analyze plasmids first"],
317
+ value="Analyze plasmids first",
318
+ interactive=False # Initially not interactive until analysis is done
319
+ )
320
+ plot_btn = gr.Button("Generate Gel Plot", variant="primary", elem_id="plot_button")
321
+
322
+ with gr.Column(scale=2):
323
+ gel_plot_output = gr.Plot(label="Virtual Agarose Gel")
324
+
325
+ gr.Markdown("---")
326
+ gr.Markdown("Developed using Biopython, Matplotlib, and Gradio.")
327
+ gr.Markdown("Note: Large plasmid files or complex analyses might take a few moments.")
328
+
329
+
330
+ # --- Event Handlers ---
331
+
332
+ # Update the hidden textbox when radio button changes
333
+ plasmid_to_plot_choice_radio.change(
334
+ fn=lambda x: x,
335
+ inputs=[plasmid_to_plot_choice_radio],
336
+ outputs=[_current_plasmid_choice_for_plot_hidden]
337
+ )
338
+
339
+ # When Analyze button is clicked:
340
+ analyze_btn.click(
341
+ fn=analyze_plasmids_gradio,
342
+ inputs=[file_p1, file_p2, _current_plasmid_choice_for_plot_hidden], # Pass current radio choice
343
+ outputs=[
344
+ status_message_txt,
345
+ unique_enzymes_p1_txt,
346
+ unique_enzymes_p2_txt,
347
+ plasmid1_data_state, # Store full plasmid SeqRecord data
348
+ plasmid2_data_state, # Store full plasmid SeqRecord data
349
+ p1_unique_enzymes_list_state,
350
+ p2_unique_enzymes_list_state,
351
+ enzyme_for_plot_dropdown # Update dropdown based on analysis and radio choice
352
+ ]
353
+ )
354
+
355
+ # When plasmid choice (Radio) changes AFTER analysis, update the enzyme dropdown:
356
+ plasmid_to_plot_choice_radio.change(
357
+ fn=update_enzyme_dropdown_choices_on_radio_change,
358
+ inputs=[plasmid_to_plot_choice_radio, p1_unique_enzymes_list_state, p2_unique_enzymes_list_state],
359
+ outputs=[enzyme_for_plot_dropdown]
360
+ )
361
+
362
+ # When Plot button is clicked:
363
+ plot_btn.click(
364
+ fn=plot_selected_digest_controller,
365
+ inputs=[plasmid_to_plot_choice_radio, enzyme_for_plot_dropdown, plasmid1_data_state, plasmid2_data_state],
366
+ outputs=[gel_plot_output]
367
+ )
368
+
369
+ if __name__ == '__main__':
370
+ demo.launch()