re-type commited on
Commit
f045dd1
·
verified ·
1 Parent(s): ef3895b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -433
app.py CHANGED
@@ -1,443 +1,18 @@
1
  import gradio as gr
2
- import torch
3
- import numpy as np
4
- import os
5
- import traceback
6
- import logging
7
- import sys
8
 
9
- # Configure logging
10
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
- logger = logging.getLogger(__name__)
12
 
13
- print("=== Gene Prediction App Starting ===")
14
- print(f"Working directory: {os.getcwd()}")
15
- print(f"Available files: {os.listdir('.')}")
16
- print(f"PyTorch version: {torch.__version__}")
17
- print(f"Gradio version: {gr.__version__}")
18
- print(f"Python path: {sys.path}")
19
 
20
- # Global variables
21
- predictor = None
22
- model_loaded = False
23
- error_message = ""
24
 
25
- def initialize_model():
26
- """Initialize the model with proper error handling"""
27
- global predictor, model_loaded, error_message
28
-
29
- try:
30
- print("Attempting to import predictor...")
31
-
32
- # Try different import approaches
33
- try:
34
- from predictor import GenePredictor
35
- print("✅ Imported from predictor module")
36
- except ImportError:
37
- try:
38
- # If predictor.py is in the same directory
39
- import importlib.util
40
- spec = importlib.util.spec_from_file_location("predictor", "predictor.py")
41
- predictor_module = importlib.util.module_from_spec(spec)
42
- spec.loader.exec_module(predictor_module)
43
- GenePredictor = predictor_module.GenePredictor
44
- print("✅ Imported predictor.py directly")
45
- except Exception as e:
46
- print(f"Failed to import predictor: {e}")
47
- raise ImportError(f"Could not import GenePredictor: {e}")
48
-
49
- # Look for model file
50
- possible_model_paths = [
51
- 'best_boundary_aware_model.pth',
52
- 'model/best_boundary_aware_model.pth',
53
- './best_boundary_aware_model.pth'
54
- ]
55
-
56
- model_path = None
57
- for path in possible_model_paths:
58
- if os.path.exists(path):
59
- model_path = path
60
- break
61
-
62
- if not model_path:
63
- available_models = [f for f in os.listdir('.') if f.endswith('.pth')]
64
- if os.path.exists('model'):
65
- available_models.extend([f"model/{f}" for f in os.listdir('model') if f.endswith('.pth')])
66
-
67
- error_message = f"❌ Model file not found. Searched: {possible_model_paths}. Available: {available_models}"
68
- print(error_message)
69
- return False
70
-
71
- print(f"Found model file: {model_path}")
72
- print(f"Model file size: {os.path.getsize(model_path)} bytes")
73
-
74
- # Initialize predictor
75
- predictor = GenePredictor(model_path=model_path)
76
- model_loaded = True
77
- print("✅ Model initialized successfully")
78
- return True
79
-
80
- except Exception as e:
81
- error_message = f"❌ Model initialization failed: {str(e)}"
82
- print(error_message)
83
- print("Full traceback:")
84
- traceback.print_exc()
85
- return False
86
 
87
- def predict_genes(sequence):
88
- """Gene prediction function with comprehensive error handling"""
89
- try:
90
- # Check if model is loaded
91
- if not model_loaded or predictor is None:
92
- return f"🚫 **Model Error**\n\n{error_message}\n\nPlease check that:\n1. predictor.py is in the same directory\n2. Model file (.pth) exists\n3. All dependencies are installed"
93
-
94
- # Input validation
95
- if not sequence or not sequence.strip():
96
- return "⚠️ **Input Error**\n\nPlease enter a DNA sequence."
97
-
98
- # Clean sequence
99
- sequence = sequence.strip().upper()
100
- sequence = sequence.replace(' ', '').replace('\n', '').replace('\t', '').replace('\r', '')
101
-
102
- # Character validation
103
- valid_chars = set('ATCGN')
104
- invalid_chars = set(sequence) - valid_chars
105
- if invalid_chars:
106
- return f"⚠️ **Invalid Characters**\n\nFound invalid characters: {', '.join(sorted(invalid_chars))}\n\nPlease use only: A, T, C, G, N"
107
-
108
- # Length validation
109
- if len(sequence) < 3:
110
- return f"⚠️ **Sequence Too Short**\n\nMinimum length: 3 nucleotides\nYour sequence: {len(sequence)} nucleotides"
111
-
112
- if len(sequence) > 10000:
113
- return f"⚠️ **Sequence Too Long**\n\nMaximum length: 10,000 nucleotides\nYour sequence: {len(sequence)} nucleotides\n\nFor longer sequences, consider splitting them into smaller chunks."
114
-
115
- print(f"Processing sequence of length: {len(sequence)}")
116
-
117
- # Make prediction
118
- predictions, probs_dict, confidence = predictor.predict(sequence)
119
- regions = predictor.extract_gene_regions(predictions, sequence)
120
-
121
- # Format results
122
- result = f"🧬 **Gene Prediction Results**\n\n"
123
- result += f"📊 **Analysis Summary:**\n"
124
- result += f"• Sequence length: {len(sequence):,} bp\n"
125
- result += f"• Gene regions found: {len(regions)}\n"
126
- result += f"• Overall confidence: {confidence:.3f}\n"
127
- result += f"• Analysis completed successfully ✅\n\n"
128
-
129
- if not regions:
130
- result += f"🔍 **No Gene Regions Detected**\n\n"
131
- result += f"The model did not detect any gene regions meeting the minimum criteria in this sequence.\n"
132
- result += f"This could mean:\n"
133
- result += f"• The sequence may not contain protein-coding genes\n"
134
- result += f"• Genes may be partial or fragmented\n"
135
- result += f"• The sequence may be non-coding DNA\n"
136
- return result
137
-
138
- result += f"📍 **Detected Gene Regions:**\n\n"
139
-
140
- total_gene_length = 0
141
- for i, region in enumerate(regions, 1):
142
- result += f"**🧬 Gene Region {i}:**\n"
143
- result += f"├─ Position: {region['start']:,} - {region['end']:,} bp\n"
144
- result += f"├─ Length: {region['length']:,} bp\n"
145
- result += f"├─ In-frame: {'Yes' if region.get('in_frame', False) else 'No'}\n"
146
-
147
- # Start codon info
148
- start_codon = region.get('start_codon')
149
- if start_codon:
150
- result += f"├─ Start codon: {start_codon}\n"
151
- else:
152
- result += f"├─ Start codon: Not detected\n"
153
-
154
- # Stop codon info
155
- stop_codon = region.get('stop_codon')
156
- if stop_codon:
157
- result += f"├─ Stop codon: {stop_codon}\n"
158
- else:
159
- result += f"├─ Stop codon: Not detected\n"
160
-
161
- # Sequence preview
162
- seq = region.get('sequence', '')
163
- if seq:
164
- if len(seq) <= 120:
165
- result += f"└─ Sequence: `{seq}`\n"
166
- else:
167
- preview = seq[:60] + '...' + seq[-60:]
168
- result += f"└─ Sequence: `{preview}`\n"
169
-
170
- total_gene_length += region['length']
171
- result += "\n"
172
-
173
- # Summary statistics
174
- result += f"📈 **Statistics:**\n"
175
- result += f"• Total gene content: {total_gene_length:,} bp ({total_gene_length/len(sequence)*100:.1f}% of sequence)\n"
176
- result += f"• Average gene length: {total_gene_length//len(regions):,} bp\n"
177
- result += f"• Gene density: {len(regions)/(len(sequence)/1000):.2f} genes per kb\n"
178
-
179
- return result
180
-
181
- except Exception as e:
182
- error_msg = f"🚫 **Prediction Error**\n\n"
183
- error_msg += f"An error occurred during prediction:\n\n"
184
- error_msg += f"```\n{str(e)}\n```\n\n"
185
- error_msg += f"**Troubleshooting:**\n"
186
- error_msg += f"• Check that predictor.py is in the same directory\n"
187
- error_msg += f"• Verify model file exists and is not corrupted\n"
188
- error_msg += f"• Ensure sequence contains only valid DNA characters\n"
189
-
190
- print(f"Prediction error: {e}")
191
- traceback.print_exc()
192
- return error_msg
193
 
194
- def get_sequence_stats(sequence):
195
- """Get basic statistics about the input sequence"""
196
- if not sequence or not sequence.strip():
197
- return ""
198
-
199
- sequence = sequence.strip().upper().replace(' ', '').replace('\n', '').replace('\t', '')
200
-
201
- if not sequence:
202
- return ""
203
-
204
- stats = f"**Sequence Info:** {len(sequence)} bp"
205
-
206
- # Base composition
207
- a_count = sequence.count('A')
208
- t_count = sequence.count('T')
209
- c_count = sequence.count('C')
210
- g_count = sequence.count('G')
211
- n_count = sequence.count('N')
212
-
213
- total_valid = a_count + t_count + c_count + g_count
214
- if total_valid > 0:
215
- gc_content = (c_count + g_count) / total_valid * 100
216
- stats += f" | GC: {gc_content:.1f}%"
217
-
218
- if n_count > 0:
219
- stats += f" | N's: {n_count}"
220
-
221
- return stats
222
 
223
- def create_interface():
224
- """Create the Gradio interface"""
225
- # Initialize model on startup
226
- print("Initializing model...")
227
- model_status = initialize_model()
228
 
229
- # Create custom CSS for better styling
230
- custom_css = """
231
- .gene-app {
232
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
233
- }
234
 
235
- .status-ready {
236
- background: linear-gradient(135deg, #d4edda 0%, #c3e6cb 100%);
237
- border: 2px solid #28a745;
238
- border-radius: 10px;
239
- padding: 15px;
240
- color: #155724;
241
- font-weight: bold;
242
- box-shadow: 0 2px 10px rgba(40, 167, 69, 0.2);
243
- }
244
-
245
- .status-error {
246
- background: linear-gradient(135deg, #f8d7da 0%, #f5c6cb 100%);
247
- border: 2px solid #dc3545;
248
- border-radius: 10px;
249
- padding: 15px;
250
- color: #721c24;
251
- font-weight: bold;
252
- box-shadow: 0 2px 10px rgba(220, 53, 69, 0.2);
253
- }
254
-
255
- .main-title {
256
- text-align: center;
257
- background: linear-gradient(135deg, #2E8B57 0%, #20B2AA 100%);
258
- -webkit-background-clip: text;
259
- -webkit-text-fill-color: transparent;
260
- background-clip: text;
261
- font-size: 2.5rem;
262
- font-weight: bold;
263
- margin-bottom: 1rem;
264
- }
265
-
266
- .instructions {
267
- background: #f8f9fa;
268
- border-radius: 10px;
269
- padding: 20px;
270
- border-left: 4px solid #2E8B57;
271
- margin: 1rem 0;
272
- }
273
-
274
- .sequence-stats {
275
- font-size: 0.9rem;
276
- color: #6c757d;
277
- font-style: italic;
278
- margin-top: 5px;
279
- }
280
- """
281
-
282
- print("Creating Gradio interface...")
283
-
284
- # Determine status message and styling
285
- if model_loaded:
286
- status_html = '''
287
- <div class="status-ready">
288
- <strong>✅ Model Status:</strong> Ready for gene prediction!<br>
289
- <small>🔬 Boundary-aware deep learning model loaded successfully</small>
290
- </div>
291
- '''
292
- else:
293
- status_html = f'''
294
- <div class="status-error">
295
- <strong>❌ Model Status:</strong> Model initialization failed<br>
296
- <small>📋 Details: {error_message}</small>
297
- </div>
298
- '''
299
-
300
- # Example sequences
301
- examples = [
302
- # Short example with clear gene
303
- ["ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTAA"],
304
- # Longer example
305
- ["ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAATAG"],
306
- ]
307
-
308
- # Create the interface with custom theme
309
- with gr.Blocks(
310
- title="🧬 Gene Prediction Tool",
311
- theme=gr.themes.Soft(primary_hue="emerald", secondary_hue="teal"),
312
- css=custom_css
313
- ) as interface:
314
-
315
- gr.HTML('<h1 class="main-title">🧬 Advanced Gene Prediction Tool</h1>')
316
- gr.HTML('<p style="text-align: center; font-size: 1.1rem; color: #6c757d; margin-bottom: 2rem;">AI-powered boundary-aware gene detection system</p>')
317
-
318
- gr.HTML(status_html)
319
-
320
- with gr.Row():
321
- gr.HTML('''
322
- <div class="instructions">
323
- <h3>🔬 How to Use:</h3>
324
- <ol>
325
- <li><strong>Enter DNA sequence:</strong> Paste your sequence using A, T, C, G, N characters</li>
326
- <li><strong>Click Analyze:</strong> The AI model will predict gene regions</li>
327
- <li><strong>Review results:</strong> View detected genes with positions, codons, and confidence</li>
328
- </ol>
329
-
330
- <h4>📏 Requirements:</h4>
331
- <ul>
332
- <li>Characters: Only A, T, C, G, N allowed</li>
333
- <li>Length: 3 - 10,000 nucleotides</li>
334
- <li>Format: Raw sequence (FASTA headers will be ignored)</li>
335
- </ul>
336
- </div>
337
- ''')
338
-
339
- with gr.Row():
340
- with gr.Column(scale=1):
341
- sequence_input = gr.Textbox(
342
- label="🧬 DNA Sequence Input",
343
- placeholder="Enter or paste your DNA sequence here...\nExample: ATGAAACGCATTAGCACC...",
344
- lines=10,
345
- max_lines=20,
346
- show_copy_button=True,
347
- container=True
348
- )
349
-
350
- # Real-time sequence stats
351
- sequence_stats = gr.HTML(value="", elem_classes=["sequence-stats"])
352
-
353
- with gr.Row():
354
- submit_btn = gr.Button("🔬 Analyze Sequence", variant="primary", size="lg", scale=2)
355
- clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg", scale=1)
356
-
357
- # Example buttons
358
- gr.Markdown("### 📝 Quick Examples:")
359
- with gr.Row():
360
- example1_btn = gr.Button("Short Gene", variant="secondary", size="sm")
361
- example2_btn = gr.Button("Longer Sequence", variant="secondary", size="sm")
362
-
363
- with gr.Column(scale=2):
364
- output = gr.Textbox(
365
- label="🔬 Analysis Results",
366
- lines=25,
367
- max_lines=35,
368
- show_copy_button=True,
369
- container=True,
370
- placeholder="Results will appear here after analysis..."
371
- )
372
-
373
- # Footer
374
- gr.HTML('''
375
- <div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #dee2e6; color: #6c757d;">
376
- <small>🧬 Powered by boundary-aware deep learning | Built with PyTorch & Gradio</small>
377
- </div>
378
- ''')
379
-
380
- # Event handlers
381
- def update_stats(sequence):
382
- return get_sequence_stats(sequence)
383
-
384
- # Real-time sequence stats update
385
- sequence_input.change(
386
- fn=update_stats,
387
- inputs=sequence_input,
388
- outputs=sequence_stats
389
- )
390
-
391
- # Main prediction
392
- submit_btn.click(
393
- fn=predict_genes,
394
- inputs=sequence_input,
395
- outputs=output
396
- )
397
-
398
- # Clear functionality
399
- clear_btn.click(
400
- fn=lambda: ("", "", ""),
401
- outputs=[sequence_input, output, sequence_stats]
402
- )
403
-
404
- # Example buttons
405
- example1_btn.click(
406
- fn=lambda: examples[0][0],
407
- outputs=sequence_input
408
- )
409
-
410
- example2_btn.click(
411
- fn=lambda: examples[1][0],
412
- outputs=sequence_input
413
- )
414
-
415
- # Allow Enter key to submit
416
- sequence_input.submit(
417
- fn=predict_genes,
418
- inputs=sequence_input,
419
- outputs=output
420
- )
421
-
422
- return interface
423
-
424
-
425
- # Create and launch the interface
426
- if __name__ == "__main__":
427
- print("🚀 Launching Gene Prediction App...")
428
-
429
- # Create the interface
430
- demo = create_interface()
431
-
432
- print(f"Model loaded: {model_loaded}")
433
- print(f"Open your browser to see the interface")
434
-
435
- # Launch with Hugging Face Spaces compatible settings
436
- demo.launch(
437
- server_name="0.0.0.0",
438
- server_port=7860,
439
- share=False,
440
- debug=False,
441
- show_error=True,
442
- quiet=False
443
- )
 
1
  import gradio as gr
 
 
 
 
 
 
2
 
 
 
 
3
 
 
 
 
 
 
 
4
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ def test_function(text):
10
+ return f"You entered: {text}"
 
 
 
11
 
 
 
 
 
 
12
 
13
+ # Simple test interface
14
+ with gr.Blocks(title="Test App") as demo:
15
+ gr.Markdown("# Test Interface")
16
+ with gr.Row():
17
+ input_box = gr.Textbox(label="Input")
18
+ output_box = gr.Textbox(label="Output")