re-type commited on
Commit
e8a3f8e
·
verified ·
1 Parent(s): bbbbe97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +437 -11
app.py CHANGED
@@ -1,17 +1,443 @@
1
  import gradio as gr
 
 
 
 
 
 
2
 
3
- def test_function(text):
4
- return f"You entered: {text}"
 
5
 
6
- # Simple test interface
7
- with gr.Blocks(title="Test App") as demo:
8
- gr.Markdown("# Test Interface")
9
- with gr.Row():
10
- input_box = gr.Textbox(label="Input")
11
- output_box = gr.Textbox(label="Output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- btn = gr.Button("Process")
14
- btn.click(test_function, inputs=input_box, outputs=output_box)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  if __name__ == "__main__":
17
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import os
5
+ import traceback
6
+ import logging
7
+ import sys
8
 
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
+ logger = logging.getLogger(__name__)
12
 
13
+ print("=== Gene Prediction App Starting ===")
14
+ print(f"Working directory: {os.getcwd()}")
15
+ print(f"Available files: {os.listdir('.')}")
16
+ print(f"PyTorch version: {torch.__version__}")
17
+ print(f"Gradio version: {gr.__version__}")
18
+ print(f"Python path: {sys.path}")
19
+
20
+ # Global variables
21
+ predictor = None
22
+ model_loaded = False
23
+ error_message = ""
24
+
25
+ def initialize_model():
26
+ """Initialize the model with proper error handling"""
27
+ global predictor, model_loaded, error_message
28
+
29
+ try:
30
+ print("Attempting to import predictor...")
31
+
32
+ # Try different import approaches
33
+ try:
34
+ from predictor import GenePredictor
35
+ print("✅ Imported from predictor module")
36
+ except ImportError:
37
+ try:
38
+ # If predictor.py is in the same directory
39
+ import importlib.util
40
+ spec = importlib.util.spec_from_file_location("predictor", "predictor.py")
41
+ predictor_module = importlib.util.module_from_spec(spec)
42
+ spec.loader.exec_module(predictor_module)
43
+ GenePredictor = predictor_module.GenePredictor
44
+ print("✅ Imported predictor.py directly")
45
+ except Exception as e:
46
+ print(f"Failed to import predictor: {e}")
47
+ raise ImportError(f"Could not import GenePredictor: {e}")
48
+
49
+ # Look for model file
50
+ possible_model_paths = [
51
+ 'best_boundary_aware_model.pth',
52
+ 'model/best_boundary_aware_model.pth',
53
+ './best_boundary_aware_model.pth'
54
+ ]
55
+
56
+ model_path = None
57
+ for path in possible_model_paths:
58
+ if os.path.exists(path):
59
+ model_path = path
60
+ break
61
+
62
+ if not model_path:
63
+ available_models = [f for f in os.listdir('.') if f.endswith('.pth')]
64
+ if os.path.exists('model'):
65
+ available_models.extend([f"model/{f}" for f in os.listdir('model') if f.endswith('.pth')])
66
+
67
+ error_message = f"❌ Model file not found. Searched: {possible_model_paths}. Available: {available_models}"
68
+ print(error_message)
69
+ return False
70
+
71
+ print(f"Found model file: {model_path}")
72
+ print(f"Model file size: {os.path.getsize(model_path)} bytes")
73
+
74
+ # Initialize predictor
75
+ predictor = GenePredictor(model_path=model_path)
76
+ model_loaded = True
77
+ print("✅ Model initialized successfully")
78
+ return True
79
+
80
+ except Exception as e:
81
+ error_message = f"❌ Model initialization failed: {str(e)}"
82
+ print(error_message)
83
+ print("Full traceback:")
84
+ traceback.print_exc()
85
+ return False
86
+
87
+ def predict_genes(sequence):
88
+ """Gene prediction function with comprehensive error handling"""
89
+ try:
90
+ # Check if model is loaded
91
+ if not model_loaded or predictor is None:
92
+ return f"🚫 **Model Error**\n\n{error_message}\n\nPlease check that:\n1. predictor.py is in the same directory\n2. Model file (.pth) exists\n3. All dependencies are installed"
93
+
94
+ # Input validation
95
+ if not sequence or not sequence.strip():
96
+ return "⚠️ **Input Error**\n\nPlease enter a DNA sequence."
97
+
98
+ # Clean sequence
99
+ sequence = sequence.strip().upper()
100
+ sequence = sequence.replace(' ', '').replace('\n', '').replace('\t', '').replace('\r', '')
101
+
102
+ # Character validation
103
+ valid_chars = set('ATCGN')
104
+ invalid_chars = set(sequence) - valid_chars
105
+ if invalid_chars:
106
+ return f"⚠️ **Invalid Characters**\n\nFound invalid characters: {', '.join(sorted(invalid_chars))}\n\nPlease use only: A, T, C, G, N"
107
+
108
+ # Length validation
109
+ if len(sequence) < 3:
110
+ return f"⚠️ **Sequence Too Short**\n\nMinimum length: 3 nucleotides\nYour sequence: {len(sequence)} nucleotides"
111
+
112
+ if len(sequence) > 10000:
113
+ return f"⚠️ **Sequence Too Long**\n\nMaximum length: 10,000 nucleotides\nYour sequence: {len(sequence)} nucleotides\n\nFor longer sequences, consider splitting them into smaller chunks."
114
+
115
+ print(f"Processing sequence of length: {len(sequence)}")
116
+
117
+ # Make prediction
118
+ predictions, probs_dict, confidence = predictor.predict(sequence)
119
+ regions = predictor.extract_gene_regions(predictions, sequence)
120
+
121
+ # Format results
122
+ result = f"🧬 **Gene Prediction Results**\n\n"
123
+ result += f"📊 **Analysis Summary:**\n"
124
+ result += f"• Sequence length: {len(sequence):,} bp\n"
125
+ result += f"• Gene regions found: {len(regions)}\n"
126
+ result += f"• Overall confidence: {confidence:.3f}\n"
127
+ result += f"• Analysis completed successfully ✅\n\n"
128
+
129
+ if not regions:
130
+ result += f"🔍 **No Gene Regions Detected**\n\n"
131
+ result += f"The model did not detect any gene regions meeting the minimum criteria in this sequence.\n"
132
+ result += f"This could mean:\n"
133
+ result += f"• The sequence may not contain protein-coding genes\n"
134
+ result += f"• Genes may be partial or fragmented\n"
135
+ result += f"• The sequence may be non-coding DNA\n"
136
+ return result
137
+
138
+ result += f"📍 **Detected Gene Regions:**\n\n"
139
+
140
+ total_gene_length = 0
141
+ for i, region in enumerate(regions, 1):
142
+ result += f"**🧬 Gene Region {i}:**\n"
143
+ result += f"├─ Position: {region['start']:,} - {region['end']:,} bp\n"
144
+ result += f"├─ Length: {region['length']:,} bp\n"
145
+ result += f"├─ In-frame: {'Yes' if region.get('in_frame', False) else 'No'}\n"
146
+
147
+ # Start codon info
148
+ start_codon = region.get('start_codon')
149
+ if start_codon:
150
+ result += f"├─ Start codon: {start_codon}\n"
151
+ else:
152
+ result += f"├─ Start codon: Not detected\n"
153
+
154
+ # Stop codon info
155
+ stop_codon = region.get('stop_codon')
156
+ if stop_codon:
157
+ result += f"├─ Stop codon: {stop_codon}\n"
158
+ else:
159
+ result += f"├─ Stop codon: Not detected\n"
160
+
161
+ # Sequence preview
162
+ seq = region.get('sequence', '')
163
+ if seq:
164
+ if len(seq) <= 120:
165
+ result += f"└─ Sequence: `{seq}`\n"
166
+ else:
167
+ preview = seq[:60] + '...' + seq[-60:]
168
+ result += f"└─ Sequence: `{preview}`\n"
169
+
170
+ total_gene_length += region['length']
171
+ result += "\n"
172
+
173
+ # Summary statistics
174
+ result += f"📈 **Statistics:**\n"
175
+ result += f"• Total gene content: {total_gene_length:,} bp ({total_gene_length/len(sequence)*100:.1f}% of sequence)\n"
176
+ result += f"• Average gene length: {total_gene_length//len(regions):,} bp\n"
177
+ result += f"• Gene density: {len(regions)/(len(sequence)/1000):.2f} genes per kb\n"
178
+
179
+ return result
180
+
181
+ except Exception as e:
182
+ error_msg = f"🚫 **Prediction Error**\n\n"
183
+ error_msg += f"An error occurred during prediction:\n\n"
184
+ error_msg += f"```\n{str(e)}\n```\n\n"
185
+ error_msg += f"**Troubleshooting:**\n"
186
+ error_msg += f"• Check that predictor.py is in the same directory\n"
187
+ error_msg += f"• Verify model file exists and is not corrupted\n"
188
+ error_msg += f"• Ensure sequence contains only valid DNA characters\n"
189
+
190
+ print(f"Prediction error: {e}")
191
+ traceback.print_exc()
192
+ return error_msg
193
+
194
+ def get_sequence_stats(sequence):
195
+ """Get basic statistics about the input sequence"""
196
+ if not sequence or not sequence.strip():
197
+ return ""
198
+
199
+ sequence = sequence.strip().upper().replace(' ', '').replace('\n', '').replace('\t', '')
200
+
201
+ if not sequence:
202
+ return ""
203
 
204
+ stats = f"**Sequence Info:** {len(sequence)} bp"
205
+
206
+ # Base composition
207
+ a_count = sequence.count('A')
208
+ t_count = sequence.count('T')
209
+ c_count = sequence.count('C')
210
+ g_count = sequence.count('G')
211
+ n_count = sequence.count('N')
212
+
213
+ total_valid = a_count + t_count + c_count + g_count
214
+ if total_valid > 0:
215
+ gc_content = (c_count + g_count) / total_valid * 100
216
+ stats += f" | GC: {gc_content:.1f}%"
217
+
218
+ if n_count > 0:
219
+ stats += f" | N's: {n_count}"
220
+
221
+ return stats
222
+
223
+ def create_interface():
224
+ """Create the Gradio interface"""
225
+ # Initialize model on startup
226
+ print("Initializing model...")
227
+ model_status = initialize_model()
228
+
229
+ # Create custom CSS for better styling
230
+ custom_css = """
231
+ .gene-app {
232
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
233
+ }
234
+
235
+ .status-ready {
236
+ background: linear-gradient(135deg, #d4edda 0%, #c3e6cb 100%);
237
+ border: 2px solid #28a745;
238
+ border-radius: 10px;
239
+ padding: 15px;
240
+ color: #155724;
241
+ font-weight: bold;
242
+ box-shadow: 0 2px 10px rgba(40, 167, 69, 0.2);
243
+ }
244
 
245
+ .status-error {
246
+ background: linear-gradient(135deg, #f8d7da 0%, #f5c6cb 100%);
247
+ border: 2px solid #dc3545;
248
+ border-radius: 10px;
249
+ padding: 15px;
250
+ color: #721c24;
251
+ font-weight: bold;
252
+ box-shadow: 0 2px 10px rgba(220, 53, 69, 0.2);
253
+ }
254
+
255
+ .main-title {
256
+ text-align: center;
257
+ background: linear-gradient(135deg, #2E8B57 0%, #20B2AA 100%);
258
+ -webkit-background-clip: text;
259
+ -webkit-text-fill-color: transparent;
260
+ background-clip: text;
261
+ font-size: 2.5rem;
262
+ font-weight: bold;
263
+ margin-bottom: 1rem;
264
+ }
265
+
266
+ .instructions {
267
+ background: #f8f9fa;
268
+ border-radius: 10px;
269
+ padding: 20px;
270
+ border-left: 4px solid #2E8B57;
271
+ margin: 1rem 0;
272
+ }
273
+
274
+ .sequence-stats {
275
+ font-size: 0.9rem;
276
+ color: #6c757d;
277
+ font-style: italic;
278
+ margin-top: 5px;
279
+ }
280
+ """
281
+
282
+ print("Creating Gradio interface...")
283
+
284
+ # Determine status message and styling
285
+ if model_loaded:
286
+ status_html = '''
287
+ <div class="status-ready">
288
+ <strong>✅ Model Status:</strong> Ready for gene prediction!<br>
289
+ <small>🔬 Boundary-aware deep learning model loaded successfully</small>
290
+ </div>
291
+ '''
292
+ else:
293
+ status_html = f'''
294
+ <div class="status-error">
295
+ <strong>❌ Model Status:</strong> Model initialization failed<br>
296
+ <small>📋 Details: {error_message}</small>
297
+ </div>
298
+ '''
299
+
300
+ # Example sequences
301
+ examples = [
302
+ # Short example with clear gene
303
+ ["ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTAA"],
304
+ # Longer example
305
+ ["ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAATAG"],
306
+ ]
307
+
308
+ # Create the interface with custom theme
309
+ with gr.Blocks(
310
+ title="🧬 Gene Prediction Tool",
311
+ theme=gr.themes.Soft(primary_hue="emerald", secondary_hue="teal"),
312
+ css=custom_css
313
+ ) as interface:
314
+
315
+ gr.HTML('<h1 class="main-title">🧬 Advanced Gene Prediction Tool</h1>')
316
+ gr.HTML('<p style="text-align: center; font-size: 1.1rem; color: #6c757d; margin-bottom: 2rem;">AI-powered boundary-aware gene detection system</p>')
317
+
318
+ gr.HTML(status_html)
319
+
320
+ with gr.Row():
321
+ gr.HTML('''
322
+ <div class="instructions">
323
+ <h3>🔬 How to Use:</h3>
324
+ <ol>
325
+ <li><strong>Enter DNA sequence:</strong> Paste your sequence using A, T, C, G, N characters</li>
326
+ <li><strong>Click Analyze:</strong> The AI model will predict gene regions</li>
327
+ <li><strong>Review results:</strong> View detected genes with positions, codons, and confidence</li>
328
+ </ol>
329
+
330
+ <h4>📏 Requirements:</h4>
331
+ <ul>
332
+ <li>Characters: Only A, T, C, G, N allowed</li>
333
+ <li>Length: 3 - 10,000 nucleotides</li>
334
+ <li>Format: Raw sequence (FASTA headers will be ignored)</li>
335
+ </ul>
336
+ </div>
337
+ ''')
338
+
339
+ with gr.Row():
340
+ with gr.Column(scale=1):
341
+ sequence_input = gr.Textbox(
342
+ label="🧬 DNA Sequence Input",
343
+ placeholder="Enter or paste your DNA sequence here...\nExample: ATGAAACGCATTAGCACC...",
344
+ lines=10,
345
+ max_lines=20,
346
+ show_copy_button=True,
347
+ container=True
348
+ )
349
+
350
+ # Real-time sequence stats
351
+ sequence_stats = gr.HTML(value="", elem_classes=["sequence-stats"])
352
+
353
+ with gr.Row():
354
+ submit_btn = gr.Button("🔬 Analyze Sequence", variant="primary", size="lg", scale=2)
355
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg", scale=1)
356
+
357
+ # Example buttons
358
+ gr.Markdown("### 📝 Quick Examples:")
359
+ with gr.Row():
360
+ example1_btn = gr.Button("Short Gene", variant="secondary", size="sm")
361
+ example2_btn = gr.Button("Longer Sequence", variant="secondary", size="sm")
362
+
363
+ with gr.Column(scale=2):
364
+ output = gr.Textbox(
365
+ label="🔬 Analysis Results",
366
+ lines=25,
367
+ max_lines=35,
368
+ show_copy_button=True,
369
+ container=True,
370
+ placeholder="Results will appear here after analysis..."
371
+ )
372
+
373
+ # Footer
374
+ gr.HTML('''
375
+ <div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #dee2e6; color: #6c757d;">
376
+ <small>🧬 Powered by boundary-aware deep learning | Built with PyTorch & Gradio</small>
377
+ </div>
378
+ ''')
379
+
380
+ # Event handlers
381
+ def update_stats(sequence):
382
+ return get_sequence_stats(sequence)
383
+
384
+ # Real-time sequence stats update
385
+ sequence_input.change(
386
+ fn=update_stats,
387
+ inputs=sequence_input,
388
+ outputs=sequence_stats
389
+ )
390
+
391
+ # Main prediction
392
+ submit_btn.click(
393
+ fn=predict_genes,
394
+ inputs=sequence_input,
395
+ outputs=output
396
+ )
397
+
398
+ # Clear functionality
399
+ clear_btn.click(
400
+ fn=lambda: ("", "", ""),
401
+ outputs=[sequence_input, output, sequence_stats]
402
+ )
403
+
404
+ # Example buttons
405
+ example1_btn.click(
406
+ fn=lambda: examples[0][0],
407
+ outputs=sequence_input
408
+ )
409
+
410
+ example2_btn.click(
411
+ fn=lambda: examples[1][0],
412
+ outputs=sequence_input
413
+ )
414
+
415
+ # Allow Enter key to submit
416
+ sequence_input.submit(
417
+ fn=predict_genes,
418
+ inputs=sequence_input,
419
+ outputs=output
420
+ )
421
+
422
+ return interface
423
+
424
+
425
+ # Create and launch the interface
426
  if __name__ == "__main__":
427
+ print("🚀 Launching Gene Prediction App...")
428
+
429
+ # Create the interface
430
+ demo = create_interface()
431
+
432
+ print(f"Model loaded: {model_loaded}")
433
+ print(f"Open your browser to see the interface")
434
+
435
+ # Launch with Hugging Face Spaces compatible settings
436
+ demo.launch(
437
+ server_name="0.0.0.0",
438
+ server_port=7860,
439
+ share=False,
440
+ debug=False,
441
+ show_error=True,
442
+ quiet=False
443
+ )