Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 9, 2025

Commit

b5a86a2

verified ·

1 Parent(s): 57d61f8

Update app.py

Browse files

Files changed (1) hide show

app.py +377 -256

app.py CHANGED Viewed

@@ -712,383 +712,504 @@ def predict_with_keras(sequence):
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
-        # Format prediction as probabilities/scores (not a sequence)
-        result = ''.join([str(round(p, 3)) for p in prediction])
         return result
     except Exception as e:
-        logging.error(f"Keras prediction failed: {e}")
         return f"Keras prediction failed: {str(e)}"
-# --- Boundary Model Prediction ---
-def predict_with_boundary_model(sequence):
     try:
         if not boundary_model:
             return f"Boundary model not available. Input sequence: {sequence[:100]}..."
         predictions, probabilities, confidence = boundary_model.predict(sequence)
         # Extract gene regions
         regions = boundary_model.extract_gene_regions(predictions, sequence)
         if regions:
-            result = f"Confidence: {confidence:.3f}\n"
-            result += f"Regions found: {len(regions)}\n"
             for i, region in enumerate(regions[:3]):  # Show first 3 regions
-                result += f"Region {i+1}: {region['start']}-{region['end']} ({len(region['sequence'])} bp)\n"
-            if len(regions) > 3:
-                result += f"... and {len(regions) - 3} more regions\n"
-            # Return the longest region's sequence
-            longest_region = max(regions, key=lambda x: len(x['sequence']))
-            return longest_region['sequence']
-        else:
-            return f"No gene regions found. Confidence: {confidence:.3f}"
     except Exception as e:
-        logging.error(f"Boundary model prediction failed: {e}")
-        return f"Boundary model prediction failed: {str(e)}"
-# --- Combined Prediction ---
-def predict_f_gene(sequence):
-    """Main prediction function that combines all models"""
     try:
-        # Clean sequence
-        sequence = sequence.upper().strip()
-        sequence = re.sub(r'[^ATCG]', '', sequence)
         if len(sequence) < 10:
-            return "Error: Sequence too short (minimum 10 nucleotides required)."
         results = []
         results.append(f"Input sequence length: {len(sequence)} bp\n")
-        # Try boundary model first
         if boundary_model:
-            results.append("🔍 BOUNDARY MODEL PREDICTION:")
-            boundary_result = predict_with_boundary_model(sequence)
             results.append(boundary_result)
-            results.append("")
-        # Try Keras model
-        if keras_model and kmer_to_index:
-            results.append("🧠 KERAS MODEL PREDICTION:")
             keras_result = predict_with_keras(sequence)
             results.append(keras_result)
-            results.append("")
-        # Run verification pipeline
-        verification_results = run_verification_pipeline(sequence)
-        if verification_results:
-            results.append(format_verification_results(verification_results))
-        # If no models available
-        if not boundary_model and not keras_model:
-            results.append("⚠️ No prediction models available.")
-            results.append("Models are being loaded from Hugging Face Hub...")
         return "\n".join(results)
     except Exception as e:
-        logging.error(f"F-gene prediction failed: {e}")
-        return f"Prediction failed: {str(e)}"
 # --- File Processing Functions ---
-def process_fasta_file(file_path):
     """Process uploaded FASTA file"""
     try:
         sequences = {}
         current_seq = ""
         current_name = ""
-        with open(file_path, 'r') as f:
-            for line in f:
-                line = line.strip()
-                if line.startswith('>'):
-                    if current_name and current_seq:
-                        sequences[current_name] = current_seq
-                    current_name = line[1:]
-                    current_seq = ""
-                else:
-                    current_seq += line.upper()
-            # Add last sequence
-            if current_name and current_seq:
-                sequences[current_name] = current_seq
-        return sequences
-    except Exception as e:
-        logging.error(f"FASTA processing failed: {e}")
-        return None
-def batch_predict_fasta(file_path):
-    """Batch prediction for FASTA file"""
-    try:
-        sequences = process_fasta_file(file_path)
         if not sequences:
-            return "Error: Could not process FASTA file."
         results = []
-        results.append(f"Processing {len(sequences)} sequences from FASTA file:\n")
-        for i, (name, sequence) in enumerate(sequences.items()):
-            if i >= 10:  # Limit to first 10 sequences
-                results.append(f"... and {len(sequences) - 10} more sequences (showing first 10)")
                 break
-            results.append(f"📄 SEQUENCE: {name}")
-            results.append(f"Length: {len(sequence)} bp")
-            # Predict
-            prediction = predict_f_gene(sequence)
-            results.append(prediction)
-            results.append("-" * 50)
         return "\n".join(results)
     except Exception as e:
-        logging.error(f"Batch FASTA prediction failed: {e}")
-        return f"Batch prediction failed: {str(e)}"
-# --- Gradio Interface Functions ---
-def predict_sequence(sequence, file_upload=None):
-    """Main interface function for sequence prediction"""
     try:
-        # Handle file upload
-        if file_upload is not None:
-            return batch_predict_fasta(file_upload.name)
-        # Handle text input
-        if not sequence or len(sequence.strip()) < 10:
-            return "Please enter a DNA sequence (minimum 10 nucleotides) or upload a FASTA file."
-        return predict_f_gene(sequence)
     except Exception as e:
-        logging.error(f"Interface prediction failed: {e}")
-        return f"Prediction interface error: {str(e)}"
-def build_tree_interface(sequence, matching_percentage=85):
-    """Interface function for phylogenetic tree building"""
     try:
-        if not sequence or len(sequence.strip()) < 10:
-            return "Please enter a DNA sequence (minimum 10 nucleotides).", "", ""
-        # Try ML tree first
-        ml_success, ml_message, aligned_file, tree_file = build_maximum_likelihood_tree(sequence)
-        if ml_success and tree_file:
-            # ML tree successful
-            tree_info = f"🌳 Maximum Likelihood Tree Built Successfully!\n\n{ml_message}"
-            # Also try simplified tree analysis
-            html_content, html_file, simple_message = analyze_sequence_for_tree(sequence, matching_percentage)
-            if html_content:
-                return tree_info, html_content, f"{tree_info}\n\n{simple_message}"
-            else:
-                return tree_info, "", tree_info
         else:
-            # ML failed, try simplified tree
-            html_content, html_file, simple_message = analyze_sequence_for_tree(sequence, matching_percentage)
-            if html_content:
-                fallback_msg = f"⚠️ ML Tree Construction Issues:\n{ml_message}\n\n"
-                fallback_msg += "📊 Simplified Tree Analysis:\n"
-                return fallback_msg, html_content, f"{fallback_msg}{simple_message}"
-            else:
-                return f"❌ Tree construction failed:\n{ml_message}\n\nSimplified analysis: {simple_message}", "", ""
-    except Exception as e:
-        error_msg = f"Tree building interface error: {str(e)}"
-        logging.error(error_msg)
-        return error_msg, "", ""
-def get_system_status():
-    """Get system status for debugging"""
-    try:
-        status = []
-        status.append("🔧 SYSTEM STATUS:")
-        status.append("")
-        # Model status
-        status.append("📊 MODELS:")
-        status.append(f"  - Boundary Model: {'✅ Loaded' if boundary_model else '❌ Not loaded'}")
-        status.append(f"  - Keras Model: {'✅ Loaded' if keras_model else '❌ Not loaded'}")
-        status.append(f"  - Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Not loaded'}")
-        status.append(f"  - Verification Models: {len(verification_models)} loaded")
-        status.append("")
-        # Tool availability
-        mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
-        status.append("🛠️ PHYLOGENETIC TOOLS:")
-        status.append(f"  - MAFFT: {'✅ Available' if mafft_available else '❌ Not available'}")
-        if mafft_available:
-            status.append(f"    Path: {mafft_cmd}")
-        status.append(f"  - IQ-TREE: {'✅ Available' if iqtree_available else '❌ Not available'}")
-        if iqtree_available:
-            status.append(f"    Path: {iqtree_cmd}")
-        status.append("")
-        # File system
-        status.append("📁 FILES:")
-        status.append(f"  - CSV Data: {'✅ Found' if os.path.exists(csv_path) else '❌ Not found'}")
-        status.append(f"  - Models Directory: {'✅ Found' if os.path.exists('models') else '❌ Not found'}")
-        status.append(f"  - Output Directory: {'✅ Created' if os.path.exists('output') else '📝 Will be created'}")
-        return "\n".join(status)
     except Exception as e:
-        return f"Status check failed: {str(e)}"
 # --- Gradio Interface ---
 def create_gradio_interface():
-    """Create the main Gradio interface"""
-    # Custom CSS
     css = """
     .gradio-container {
-        max-width: 1200px;
-        margin: auto;
-    }
-    .tab-nav button {
-        font-size: 16px;
-        font-weight: bold;
     }
     .output-text {
         font-family: 'Courier New', monospace;
-        font-size: 14px;
     }
     """
-    with gr.Blocks(css=css, title="F-Gene Prediction & Phylogenetic Analysis") as demo:
-        gr.Markdown("# 🧬 F-Gene Prediction & Phylogenetic Analysis Platform")
-        gr.Markdown("Advanced bioinformatics pipeline for F-gene prediction and phylogenetic tree construction")
         with gr.Tabs():
             # Tab 1: Gene Prediction
-            with gr.TabItem("🔬 Gene Prediction"):
-                gr.Markdown("## F-Gene Sequence Prediction")
-                gr.Markdown("Enter a DNA sequence or upload a FASTA file for F-gene prediction using multiple ML models.")
                 with gr.Row():
                     with gr.Column(scale=2):
-                        sequence_input = gr.Textbox(
                             label="DNA Sequence",
-                            placeholder="Enter DNA sequence (ATCG format)...",
                             lines=5,
                             max_lines=10
                         )
-                        file_input = gr.File(
-                            label="Or Upload FASTA File",
-                            file_types=[".fasta", ".fa", ".fas", ".fna"]
-                        )
-                        predict_btn = gr.Button("🔍 Predict F-Gene", variant="primary")
                     with gr.Column(scale=3):
                         prediction_output = gr.Textbox(
-                            label="Prediction Results",
                             lines=20,
                             max_lines=30,
                             elem_classes=["output-text"]
                         )
                 predict_btn.click(
-                    fn=predict_sequence,
-                    inputs=[sequence_input, file_input],
-                    outputs=prediction_output
                 )
-            # Tab 2: Phylogenetic Analysis
-            with gr.TabItem("🌳 Phylogenetic Tree"):
-                gr.Markdown("## Phylogenetic Tree Construction")
-                gr.Markdown("Build maximum likelihood phylogenetic trees and perform sequence similarity analysis.")
                 with gr.Row():
                     with gr.Column(scale=1):
-                        tree_sequence_input = gr.Textbox(
-                            label="DNA Sequence for Tree Analysis",
-                            placeholder="Enter DNA sequence...",
-                            lines=5
-                        )
-                        similarity_slider = gr.Slider(
-                            minimum=70,
-                            maximum=99,
-                            value=85,
-                            step=1,
-                            label="Similarity Threshold (%)"
-                        )
-                        tree_btn = gr.Button("🌳 Build Tree", variant="primary")
-                        tree_status = gr.Textbox(
-                            label="Tree Construction Status",
-                            lines=8,
-                            elem_classes=["output-text"]
                         )
                     with gr.Column(scale=2):
-                        tree_output = gr.HTML(
-                            label="Interactive Phylogenetic Tree",
-                            height=600
-                        )
-                        tree_info = gr.Textbox(
-                            label="Tree Information",
-                            lines=5,
                             elem_classes=["output-text"]
                         )
-                tree_btn.click(
-                    fn=build_tree_interface,
-                    inputs=[tree_sequence_input, similarity_slider],
-                    outputs=[tree_status, tree_output, tree_info]
                 )
-            # Tab 3: System Status
-            with gr.TabItem("⚙️ System Status"):
-                gr.Markdown("## System Status & Diagnostics")
-                status_btn = gr.Button("🔄 Refresh Status", variant="secondary")
-                status_output = gr.Textbox(
-                    label="System Status",
-                    lines=20,
-                    elem_classes=["output-text"]
-                )
-                status_btn.click(
-                    fn=get_system_status,
-                    outputs=status_output
-                )
-                # Load initial status
-                demo.load(
-                    fn=get_system_status,
-                    outputs=status_output
-                )
-        # Footer
-        gr.Markdown("---")
-        gr.Markdown("🔬 **Powered by**: PyTorch, TensorFlow, MAFFT, IQ-TREE, and Plotly | 🧬 **Bioinformatics Pipeline v2.0**")
-    return demo
 # --- Main Application ---
 if __name__ == "__main__":
     try:
-        logging.info("Starting F-Gene Prediction & Phylogenetic Analysis Platform...")
-        # Create Gradio interface
-        demo = create_gradio_interface()
-        # Launch the interface
-        demo.launch(
-            server_name="0.0.0.0",
             server_port=7860,
-            share=True,
-            debug=True,
-            show_error=True
         )
     except Exception as e:
-        logging.error(f"Failed to start application: {e}")
-        print(f"Error: {e}")
-        print("Please check the logs and ensure all dependencies are installed.")

         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
+        # Format prediction as probabilities/scores
+        mean_score = np.mean(prediction)
+        max_score = np.max(prediction)
+        min_score = np.min(prediction)
+        result = f"Keras Model Prediction Results:\n"
+        result += f"- Mean Score: {mean_score:.4f}\n"
+        result += f"- Max Score: {max_score:.4f}\n"
+        result += f"- Min Score: {min_score:.4f}\n"
+        result += f"- Total K-mers: {len(kmers)}\n"
+        result += f"- Sequence Length: {len(sequence)} bp"
         return result
     except Exception as e:
+        logging.error(f"Keras prediction error: {e}")
         return f"Keras prediction failed: {str(e)}"
+# --- Boundary Prediction ---
+def predict_with_boundary(sequence):
     try:
         if not boundary_model:
             return f"Boundary model not available. Input sequence: {sequence[:100]}..."
+        # Get predictions from boundary model
         predictions, probabilities, confidence = boundary_model.predict(sequence)
         # Extract gene regions
         regions = boundary_model.extract_gene_regions(predictions, sequence)
+        result = f"Boundary Model Prediction Results:\n"
+        result += f"- Overall Confidence: {confidence:.4f}\n"
+        result += f"- Regions Detected: {len(regions) if regions else 0}\n"
         if regions:
             for i, region in enumerate(regions[:3]):  # Show first 3 regions
+                result += f"\nRegion {i+1}:\n"
+                result += f"  - Start: {region['start']}\n"
+                result += f"  - End: {region['end']}\n"
+                result += f"  - Length: {len(region['sequence'])} bp\n"
+                result += f"  - Confidence: {region.get('confidence', 'N/A'):.4f}\n"
+        return result
     except Exception as e:
+        logging.error(f"Boundary prediction error: {e}")
+        return f"Boundary prediction failed: {str(e)}"
+# --- Combined Prediction Function ---
+def predict_gene_sequence(sequence):
+    """Combined prediction using both models"""
     try:
+        if not sequence or len(sequence.strip()) == 0:
+            return "Please provide a DNA sequence."
+        # Clean and validate sequence
+        sequence = re.sub(r'[^ATCG]', '', sequence.upper())
         if len(sequence) < 10:
+            return "Sequence too short. Please provide at least 10 nucleotides."
         results = []
+        results.append(f"🧬 GENE SEQUENCE ANALYSIS\n")
         results.append(f"Input sequence length: {len(sequence)} bp\n")
+        results.append("=" * 50)
+        # Boundary model prediction
         if boundary_model:
+            results.append("\n🎯 BOUNDARY DETECTION:")
+            boundary_result = predict_with_boundary(sequence)
             results.append(boundary_result)
+        else:
+            results.append("\n❌ Boundary model not available")
+        # Keras model prediction
+        if keras_model:
+            results.append("\n🔍 KERAS MODEL ANALYSIS:")
             keras_result = predict_with_keras(sequence)
             results.append(keras_result)
+        else:
+            results.append("\n❌ Keras model not available")
+        # Verification models
+        if verification_models:
+            results.append("\n🔬 VERIFICATION ANALYSIS:")
+            verification_result = run_verification_pipeline(sequence)
+            formatted_verification = format_verification_results(verification_result)
+            results.append(formatted_verification)
         return "\n".join(results)
     except Exception as e:
+        logging.error(f"Gene prediction error: {e}")
+        return f"Gene prediction failed: {str(e)}"
 # --- File Processing Functions ---
+def process_fasta_file(file):
     """Process uploaded FASTA file"""
     try:
+        if file is None:
+            return "Please upload a FASTA file."
+        # Read file content
+        with open(file.name, 'r') as f:
+            content = f.read()
+        # Parse FASTA
         sequences = {}
         current_seq = ""
         current_name = ""
+        lines = content.strip().split('\n')
+        for line in lines:
+            line = line.strip()
+            if line.startswith('>'):
+                if current_name and current_seq:
+                    sequences[current_name] = current_seq
+                current_name = line[1:]  # Remove '>'
+                current_seq = ""
+            else:
+                current_seq += line.upper()
+        # Add last sequence
+        if current_name and current_seq:
+            sequences[current_name] = current_seq
         if not sequences:
+            return "No valid sequences found in FASTA file."
+        # Process each sequence
         results = []
+        results.append(f"📁 FASTA FILE ANALYSIS")
+        results.append(f"Found {len(sequences)} sequences\n")
+        results.append("=" * 60)
+        for i, (name, seq) in enumerate(sequences.items()):
+            if i >= 5:  # Limit to first 5 sequences
+                results.append(f"\n... and {len(sequences) - 5} more sequences")
                 break
+            results.append(f"\n🧬 Sequence: {name}")
+            results.append(f"Length: {len(seq)} bp")
+            # Clean sequence
+            clean_seq = re.sub(r'[^ATCG]', '', seq)
+            if len(clean_seq) >= 10:
+                # Run prediction on cleaned sequence
+                prediction = predict_gene_sequence(clean_seq)
+                results.append(prediction)
+            else:
+                results.append("❌ Sequence too short or invalid")
+            results.append("-" * 40)
         return "\n".join(results)
     except Exception as e:
+        logging.error(f"FASTA processing error: {e}")
+        return f"FASTA processing failed: {str(e)}"
+# --- Tree Building Interface Functions ---
+def build_tree_interface(sequence):
+    """Interface function for building phylogenetic trees"""
     try:
+        if not sequence or len(sequence.strip()) == 0:
+            return "Please provide a DNA sequence for tree construction."
+        # Clean sequence
+        clean_seq = re.sub(r'[^ATCG]', '', sequence.upper())
+        if len(clean_seq) < 50:
+            return "Sequence too short for phylogenetic analysis (minimum 50 bp required)."
+        # Try ML tree construction first
+        success, message, aligned_file, tree_file = build_maximum_likelihood_tree(clean_seq)
+        result = f"🌳 PHYLOGENETIC TREE CONSTRUCTION\n"
+        result += f"Input sequence length: {len(clean_seq)} bp\n"
+        result += "=" * 50 + "\n\n"
+        result += message
+        if success and tree_file:
+            # Try to read and display tree
+            try:
+                with open(tree_file, 'r') as f:
+                    tree_content = f.read().strip()
+                result += f"\n\n📄 Tree file content:\n"
+                result += f"File: {os.path.basename(tree_file)}\n"
+                result += f"Size: {len(tree_content)} characters\n"
+                # Show first part of tree if it's very long
+                if len(tree_content) > 500:
+                    result += f"Preview: {tree_content[:500]}...\n"
+                else:
+                    result += f"Content: {tree_content}\n"
+            except Exception as e:
+                result += f"\n⚠️  Could not read tree file: {e}"
+        return result
     except Exception as e:
+        logging.error(f"Tree building interface error: {e}")
+        return f"Tree construction failed: {str(e)}"
+def analyze_tree_interface(sequence, similarity_threshold):
+    """Interface function for tree analysis with similarity threshold"""
     try:
+        if not sequence or len(sequence.strip()) == 0:
+            return "Please provide a DNA sequence.", None
+        # Clean sequence
+        clean_seq = re.sub(r'[^ATCG]', '', sequence.upper())
+        if len(clean_seq) < 20:
+            return "Sequence too short for analysis (minimum 20 bp required).", None
+        # Validate similarity threshold
+        if not (1 <= similarity_threshold <= 99):
+            return "Similarity threshold must be between 1 and 99%.", None
+        # Run tree analysis
+        html_content, html_file, success_msg = analyze_sequence_for_tree(
+            clean_seq, similarity_threshold
+        )
+        if html_content:
+            result = f"🌳 PHYLOGENETIC TREE ANALYSIS\n"
+            result += f"Input sequence length: {len(clean_seq)} bp\n"
+            result += f"Similarity threshold: {similarity_threshold}%\n"
+            result += "=" * 50 + "\n\n"
+            result += success_msg
+            return result, html_file
         else:
+            return success_msg or "Tree analysis failed.", None
     except Exception as e:
+        logging.error(f"Tree analysis interface error: {e}")
+        return f"Tree analysis failed: {str(e)}", None
 # --- Gradio Interface ---
 def create_gradio_interface():
+    """Create the Gradio interface"""
+    # Custom CSS for better styling
     css = """
     .gradio-container {
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
     }
     .output-text {
         font-family: 'Courier New', monospace;
+        font-size: 12px;
+        line-height: 1.4;
+    }
+    .tab-nav {
+        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
     }
     """
+    with gr.Blocks(css=css, title="Gene Analysis Tool") as interface:
+        gr.Markdown("""
+        # 🧬 Advanced Gene Analysis Tool
+        This tool provides comprehensive gene sequence analysis including:
+        - **Gene Prediction**: Boundary detection and validation
+        - **Phylogenetic Analysis**: Tree construction and similarity analysis
+        - **File Processing**: Batch analysis of FASTA files
+        - **Model Verification**: Multi-model validation pipeline
+        """)
         with gr.Tabs():
             # Tab 1: Gene Prediction
+            with gr.Tab("🔬 Gene Prediction"):
+                gr.Markdown("### Predict gene sequences using trained models")
                 with gr.Row():
                     with gr.Column(scale=2):
+                        seq_input = gr.Textbox(
                             label="DNA Sequence",
+                            placeholder="Enter DNA sequence (A, T, C, G only)...",
                             lines=5,
                             max_lines=10
                         )
+                        predict_btn = gr.Button("🚀 Analyze Sequence", variant="primary")
                     with gr.Column(scale=3):
                         prediction_output = gr.Textbox(
+                            label="Analysis Results",
                             lines=20,
                             max_lines=30,
                             elem_classes=["output-text"]
                         )
                 predict_btn.click(
+                    fn=predict_gene_sequence,
+                    inputs=[seq_input],
+                    outputs=[prediction_output]
                 )
+            # Tab 2: File Processing
+            with gr.Tab("📁 File Processing"):
+                gr.Markdown("### Upload and analyze FASTA files")
                 with gr.Row():
                     with gr.Column(scale=1):
+                        file_input = gr.File(
+                            label="Upload FASTA File",
+                            file_types=[".fasta", ".fa", ".fas", ".txt"]
                         )
+                        process_btn = gr.Button("📊 Process File", variant="primary")
                     with gr.Column(scale=2):
+                        file_output = gr.Textbox(
+                            label="Processing Results",
+                            lines=25,
+                            max_lines=35,
                             elem_classes=["output-text"]
                         )
+                process_btn.click(
+                    fn=process_fasta_file,
+                    inputs=[file_input],
+                    outputs=[file_output]
                 )
+            # Tab 3: Phylogenetic Trees
+            with gr.Tab("🌳 Phylogenetic Trees"):
+                gr.Markdown("### Build and analyze phylogenetic trees")
+                with gr.Tabs():
+                    # Subtab: ML Tree Construction
+                    with gr.Tab("Maximum Likelihood Tree"):
+                        gr.Markdown("**Build ML tree using MAFFT + IQ-TREE**")
+                        with gr.Row():
+                            with gr.Column(scale=1):
+                                ml_seq_input = gr.Textbox(
+                                    label="DNA Sequence",
+                                    placeholder="Enter sequence for ML tree construction...",
+                                    lines=4
+                                )
+                                ml_tree_btn = gr.Button("🌳 Build ML Tree", variant="primary")
+                            with gr.Column(scale=2):
+                                ml_tree_output = gr.Textbox(
+                                    label="ML Tree Results",
+                                    lines=20,
+                                    elem_classes=["output-text"]
+                                )
+                        ml_tree_btn.click(
+                            fn=build_tree_interface,
+                            inputs=[ml_seq_input],
+                            outputs=[ml_tree_output]
+                        )
+                    # Subtab: Interactive Tree Analysis
+                    with gr.Tab("Interactive Analysis"):
+                        gr.Markdown("**Analyze sequence similarity with interactive tree**")
+                        with gr.Row():
+                            with gr.Column(scale=1):
+                                tree_seq_input = gr.Textbox(
+                                    label="Query Sequence",
+                                    placeholder="Enter sequence for tree analysis...",
+                                    lines=4
+                                )
+                                similarity_slider = gr.Slider(
+                                    minimum=1,
+                                    maximum=99,
+                                    value=80,
+                                    step=1,
+                                    label="Similarity Threshold (%)"
+                                )
+                                tree_analyze_btn = gr.Button("🔍 Analyze Tree", variant="primary")
+                            with gr.Column(scale=2):
+                                tree_analysis_output = gr.Textbox(
+                                    label="Tree Analysis Results",
+                                    lines=15,
+                                    elem_classes=["output-text"]
+                                )
+                                tree_file_output = gr.File(
+                                    label="Interactive Tree File (HTML)"
+                                )
+                        tree_analyze_btn.click(
+                            fn=analyze_tree_interface,
+                            inputs=[tree_seq_input, similarity_slider],
+                            outputs=[tree_analysis_output, tree_file_output]
+                        )
+            # Tab 4: Model Information
+            with gr.Tab("ℹ️ Model Information"):
+                gr.Markdown("""
+                ### Model Status and Information
+                **Available Models:**
+                """)
+                # Model status
+                model_status = []
+                if boundary_model:
+                    model_status.append("✅ Boundary Detection Model: Loaded")
+                else:
+                    model_status.append("❌ Boundary Detection Model: Not Available")
+                if keras_model:
+                    model_status.append("✅ Keras Validation Model: Loaded")
+                else:
+                    model_status.append("❌ Keras Validation Model: Not Available")
+                if verification_models:
+                    model_status.append(f"✅ Verification Models: {len(verification_models)} loaded")
+                    for model_name in verification_models.keys():
+                        model_status.append(f"   - {model_name}")
+                else:
+                    model_status.append("❌ Verification Models: None loaded")
+                if analyzer:
+                    model_status.append("✅ Tree Analyzer: Initialized")
+                else:
+                    model_status.append("❌ Tree Analyzer: Not Available")
+                # Check external tools
+                mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
+                if mafft_available:
+                    model_status.append(f"✅ MAFFT: Available ({mafft_cmd})")
+                else:
+                    model_status.append("❌ MAFFT: Not Available")
+                if iqtree_available:
+                    model_status.append(f"✅ IQ-TREE: Available ({iqtree_cmd})")
+                else:
+                    model_status.append("❌ IQ-TREE: Not Available")
+                gr.Markdown("\n".join(model_status))
+                gr.Markdown("""
+                ### Usage Guidelines:
+                1. **Gene Prediction**: Input DNA sequences containing only A, T, C, G characters
+                2. **File Processing**: Upload FASTA files with multiple sequences
+                3. **ML Trees**: Requires MAFFT and IQ-TREE installation
+                4. **Interactive Trees**: Uses simplified clustering for quick analysis
+                ### System Requirements:
+                - Python 3.8+
+                - TensorFlow/Keras for neural network models
+                - PyTorch for boundary detection
+                - MAFFT and IQ-TREE for phylogenetic analysis (optional)
+                """)
+        return interface
 # --- Main Application ---
 if __name__ == "__main__":
+    # Initialize logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.FileHandler('gene_analysis.log'),
+            logging.StreamHandler(sys.stdout)
+        ]
+    )
+    # Create output directories
+    os.makedirs("output", exist_ok=True)
+    os.makedirs("ml_tree_output", exist_ok=True)
+    # Log startup information
+    logging.info("Starting Gene Analysis Tool")
+    logging.info(f"Boundary model loaded: {boundary_model is not None}")
+    logging.info(f"Keras model loaded: {keras_model is not None}")
+    logging.info(f"Verification models loaded: {len(verification_models) if verification_models else 0}")
+    logging.info(f"Tree analyzer initialized: {analyzer is not None}")
+    # Check external tools
+    mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
+    logging.info(f"MAFFT available: {mafft_available}")
+    logging.info(f"IQ-TREE available: {iqtree_available}")
+    # Create and launch interface
     try:
+        interface = create_gradio_interface()
+        # Launch with appropriate settings
+        interface.launch(
+            share=False,  # Set to True if you want a public link
+            server_name="0.0.0.0",  # Allow external connections
             server_port=7860,
+            show_error=True,
+            debug=True
         )
     except Exception as e:
+        logging.error(f"Failed to launch interface: {e}")
+        import traceback
+        logging.error(f"Full traceback: {traceback.format_exc()}")
+        sys.exit(1)