Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 9, 2025

Commit

740aa59

verified ·

1 Parent(s): 62b42ec

Update app.py

Browse files

Files changed (1) hide show

app.py +277 -39

app.py CHANGED Viewed

@@ -11,16 +11,18 @@ import numpy as np
 from predictor import GenePredictor
 from tensorflow.keras.models import load_model
 import ml_simplified_tree
 # --- Global Variables ---
-MAFFT_PATH = "mafft/mafftdir/bin/mafft"
 # --- Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # --- Paths ---
 from huggingface_hub import hf_hub_download
-import tempfile
 # Model repository and file paths
 model_repo = "GGproject10/best_boundary_aware_model"
@@ -95,6 +97,212 @@ except Exception as e:
     logging.error(f"Failed to initialize tree analyzer: {e}")
     analyzer = None
 # --- Tree Analysis Function (Based on old Gradio API) ---
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> str:
     """
@@ -205,23 +413,23 @@ def read_fasta_file(file_obj):
         return ""
 # --- Full Pipeline ---
-def run_pipeline_from_file(fasta_file_obj, similarity_score):
     try:
         dna_input = read_fasta_file(fasta_file_obj)
         if not dna_input:
-            return "Failed to read FASTA file", "", "", "", None, None, None, "No input sequence"
-        return run_pipeline(dna_input, similarity_score)
     except Exception as e:
         error_msg = f"Pipeline error: {str(e)}"
         logging.error(error_msg)
-        return error_msg, "", "", "", None, None, None, error_msg
-def run_pipeline(dna_input, similarity_score=95.0):
     try:
         # Clean input
         dna_input = dna_input.upper().strip()
         if not dna_input:
-            return "Empty input", "", "", "", None, None, None, "No input provided"
         # Sanitize DNA sequence
         if not re.match('^[ACTGN]+$', dna_input):
@@ -267,29 +475,47 @@ def run_pipeline(dna_input, similarity_score=95.0):
         else:
             keras_output = "Skipped: sequence too short for F gene validation"
-        # Step 3: MAFFT and IQ-TREE (skip due to configuration issues)
         aligned_file = None
         phy_file = None
-        # Skip MAFFT due to configuration issues in the container
-        logging.info("Skipping MAFFT/IQ-TREE due to container configuration issues")
-        # Step 4: ML Simplified Tree (using the new approach)
         html_file = None
         tree_html_content = "No tree generated"
-        ml_output = ""
         if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
-                logging.info(f"Starting ML tree analysis with F gene sequence length: {len(processed_sequence)}")
-                # Use the new tree analysis function with user-specified similarity
                 tree_result = analyze_sequence_for_tree(processed_sequence, matching_percentage=similarity_score)
                 if tree_result and not tree_result.startswith("Error:"):
                     # Success - we have HTML content
                     tree_html_content = tree_result
-                    ml_output = "✅ Phylogenetic tree generated successfully!"
                     # Check if HTML file was created
                     output_dir = "output"
@@ -297,35 +523,36 @@ def run_pipeline(dna_input, similarity_score=95.0):
                         html_files = [f for f in os.listdir(output_dir) if f.endswith('.html')]
                         if html_files:
                             html_file = os.path.join(output_dir, html_files[-1])  # Get the latest
-                            ml_output += f"\n- Tree file: {html_files[-1]}"
                     # Count sequences analyzed
                     if analyzer.find_query_sequence(processed_sequence):
                         matched_ids, perc = analyzer.find_similar_sequences(similarity_score)
-                        ml_output += f"\n- {len(matched_ids)} sequences analyzed"
-                        ml_output += f"\n- Similarity threshold: {perc:.1f}%"
                 else:
                     # Error occurred
-                    ml_output = f"❌ Tree analysis failed: {tree_result}"
-                    logging.error(f"Tree analysis failed: {tree_result}")
             except Exception as e:
-                ml_output = f"❌ ML Tree analysis failed: {str(e)}"
-                logging.error(f"ML Tree failed: {e}")
                 import traceback
                 logging.error(f"Full traceback: {traceback.format_exc()}")
         elif not analyzer:
-            ml_output = "❌ Tree analyzer not initialized"
         elif not processed_sequence or len(processed_sequence) < 10:
-            ml_output = f"❌ F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
         else:
-            ml_output = "❌ Skipped due to previous step errors"
         return (
             boundary_output,
             keras_output[:500] + "..." if len(keras_output) > 500 else keras_output,
             csv_path if os.path.exists(csv_path) else "CSV file not found",
-            ml_output,
             html_file,
             aligned_file if aligned_file and os.path.exists(aligned_file) else None,
             phy_file if phy_file and os.path.exists(phy_file) else None,
@@ -337,16 +564,16 @@ def run_pipeline(dna_input, similarity_score=95.0):
         logging.error(error_msg)
         import traceback
         logging.error(f"Full traceback: {traceback.format_exc()}")
-        return error_msg, "", "", "", None, None, None, error_msg
 # --- Gradio UI ---
 with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧬 Viral Gene Phylogenetic Inference Pipeline")
-    gr.Markdown("This pipeline processes DNA sequences through boundary detection, k-mer analysis, and phylogenetic tree construction.")
     with gr.Tab("📝 Paste DNA Sequence"):
         with gr.Row():
-            with gr.Column(scale=3):
                 inp = gr.Textbox(
                     label="DNA Input",
                     placeholder="Paste your DNA sequence here (ACTG format)",
@@ -361,11 +588,16 @@ with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft())
                     label="Similarity Threshold (%)",
                     info="Higher values = more similar sequences"
                 )
         btn1 = gr.Button("🚀 Run Pipeline", variant="primary", size="lg")
     with gr.Tab("📁 Upload FASTA File"):
         with gr.Row():
-            with gr.Column(scale=3):
                 file_input = gr.File(
                     label="FASTA File",
                     file_types=['.fasta', '.fa', '.txt']
@@ -379,6 +611,11 @@ with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft())
                     label="Similarity Threshold (%)",
                     info="Higher values = more similar sequences"
                 )
         btn2 = gr.Button("🚀 Run on FASTA", variant="primary", size="lg")
     # Outputs
@@ -388,14 +625,15 @@ with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft())
         with gr.Column():
             out1 = gr.Textbox(label="🎯 Step 1: Extracted F Gene Sequence", lines=8)
             out2 = gr.Textbox(label="🔍 Step 2: F Gene Validation (Keras)", lines=3)
-        with gr.Column():
             out3 = gr.Textbox(label="📋 Dataset Used")
-            out4 = gr.Textbox(label="🌳 Step 3: Phylogenetic Tree Status", lines=5)
     with gr.Row():
-        html = gr.File(label="📥 Download Tree (HTML)")
         fasta = gr.File(label="📥 Download Aligned FASTA")
-        phy = gr.File(label="📥 Download IQ-TREE .phy File")
     with gr.Row():
         tree_html = gr.HTML(label="🌳 Interactive Tree Preview")
@@ -403,13 +641,13 @@ with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft())
     # Event handlers
     btn1.click(
         fn=run_pipeline,
-        inputs=[inp, similarity_input],
-        outputs=[out1, out2, out3, out4, html, fasta, phy, tree_html]
     )
     btn2.click(
         fn=run_pipeline_from_file,
-        inputs=[file_input, similarity_input_file],
-        outputs=[out1, out2, out3, out4, html, fasta, phy, tree_html]
     )
 if __name__ == '__main__':

 from predictor import GenePredictor
 from tensorflow.keras.models import load_model
 import ml_simplified_tree
+import tempfile
+import shutil
 # --- Global Variables ---
+MAFFT_PATH = "mafft/mafftdir/bin/mafft"  # Update this path as needed
+IQTREE_PATH = "iqtree/bin/iqtree2"  # Update this path as needed
 # --- Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # --- Paths ---
 from huggingface_hub import hf_hub_download
 # Model repository and file paths
 model_repo = "GGproject10/best_boundary_aware_model"
     logging.error(f"Failed to initialize tree analyzer: {e}")
     analyzer = None
+# --- Helper Functions ---
+def check_tool_availability():
+    """Check if MAFFT and IQ-TREE are available"""
+    mafft_available = os.path.exists(MAFFT_PATH) or shutil.which('mafft') is not None
+    iqtree_available = os.path.exists(IQTREE_PATH) or shutil.which('iqtree2') is not None or shutil.which('iqtree') is not None
+    return mafft_available, iqtree_available
+def run_mafft_alignment(input_fasta, output_fasta):
+    """Run MAFFT alignment on input FASTA file"""
+    try:
+        # Check if MAFFT is available
+        mafft_cmd = MAFFT_PATH if os.path.exists(MAFFT_PATH) else 'mafft'
+        # MAFFT command
+        cmd = [mafft_cmd, '--auto', input_fasta]
+        logging.info(f"Running MAFFT: {' '.join(cmd)}")
+        # Run MAFFT
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=300  # 5 minute timeout
+        )
+        if result.returncode == 0:
+            # Write aligned sequences to output file
+            with open(output_fasta, 'w') as f:
+                f.write(result.stdout)
+            logging.info(f"MAFFT alignment completed: {output_fasta}")
+            return True, output_fasta
+        else:
+            logging.error(f"MAFFT failed: {result.stderr}")
+            return False, f"MAFFT error: {result.stderr}"
+    except subprocess.TimeoutExpired:
+        logging.error("MAFFT timeout")
+        return False, "MAFFT timeout (>5 minutes)"
+    except Exception as e:
+        logging.error(f"MAFFT execution failed: {e}")
+        return False, f"MAFFT execution failed: {str(e)}"
+def run_iqtree_analysis(aligned_fasta, output_prefix):
+    """Run IQ-TREE maximum likelihood analysis"""
+    try:
+        # Check if IQ-TREE is available
+        if os.path.exists(IQTREE_PATH):
+            iqtree_cmd = IQTREE_PATH
+        elif shutil.which('iqtree2') is not None:
+            iqtree_cmd = 'iqtree2'
+        elif shutil.which('iqtree') is not None:
+            iqtree_cmd = 'iqtree'
+        else:
+            return False, "IQ-TREE not found"
+        # IQ-TREE command for maximum likelihood tree
+        cmd = [
+            iqtree_cmd,
+            '-s', aligned_fasta,
+            '-m', 'TEST',  # Auto model selection
+            '-bb', '1000',  # Bootstrap replicates
+            '-alrt', '1000',  # SH-aLRT test
+            '-nt', 'AUTO',  # Auto detect threads
+            '--prefix', output_prefix,
+            '-redo'  # Overwrite existing files
+        ]
+        logging.info(f"Running IQ-TREE: {' '.join(cmd)}")
+        # Run IQ-TREE
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=600  # 10 minute timeout
+        )
+        if result.returncode == 0:
+            tree_file = f"{output_prefix}.treefile"
+            if os.path.exists(tree_file):
+                logging.info(f"IQ-TREE analysis completed: {tree_file}")
+                return True, tree_file
+            else:
+                logging.error("IQ-TREE completed but tree file not found")
+                return False, "Tree file not generated"
+        else:
+            logging.error(f"IQ-TREE failed: {result.stderr}")
+            return False, f"IQ-TREE error: {result.stderr}"
+    except subprocess.TimeoutExpired:
+        logging.error("IQ-TREE timeout")
+        return False, "IQ-TREE timeout (>10 minutes)"
+    except Exception as e:
+        logging.error(f"IQ-TREE execution failed: {e}")
+        return False, f"IQ-TREE execution failed: {str(e)}"
+def create_multi_fasta_with_query(query_sequence, query_id="Query_F_Gene"):
+    """Create a multi-FASTA file with query sequence and reference sequences"""
+    try:
+        # Create temporary FASTA file
+        temp_fasta = tempfile.NamedTemporaryFile(mode='w', suffix='.fasta', delete=False)
+        # Add query sequence
+        temp_fasta.write(f">{query_id}\n{query_sequence}\n")
+        # Add reference sequences from existing aligned FASTA if available
+        ref_fasta_path = "f_gene_sequences_aligned.fasta"
+        if os.path.exists(ref_fasta_path):
+            with open(ref_fasta_path, 'r') as ref_file:
+                temp_fasta.write(ref_file.read())
+            logging.info(f"Added reference sequences from {ref_fasta_path}")
+        else:
+            # If no reference file, try to create from CSV data
+            if analyzer and hasattr(analyzer, 'data'):
+                count = 0
+                for idx, row in analyzer.data.iterrows():
+                    if 'sequence' in row and len(str(row['sequence'])) > 50:
+                        seq_id = row.get('id', f"Ref_{count}")
+                        sequence = str(row['sequence']).upper()
+                        temp_fasta.write(f">{seq_id}\n{sequence}\n")
+                        count += 1
+                        if count >= 20:  # Limit to prevent too large datasets
+                            break
+                logging.info(f"Added {count} reference sequences from CSV")
+        temp_fasta.close()
+        return temp_fasta.name
+    except Exception as e:
+        logging.error(f"Failed to create multi-FASTA: {e}")
+        return None
+def build_maximum_likelihood_tree(f_gene_sequence):
+    """Build maximum likelihood phylogenetic tree using MAFFT + IQ-TREE"""
+    try:
+        # Check tool availability
+        mafft_available, iqtree_available = check_tool_availability()
+        if not mafft_available:
+            return False, "MAFFT not available", None, None
+        if not iqtree_available:
+            return False, "IQ-TREE not available", None, None
+        # Create output directory
+        output_dir = "ml_tree_output"
+        os.makedirs(output_dir, exist_ok=True)
+        # Step 1: Create multi-FASTA file with query and reference sequences
+        logging.info("Creating multi-FASTA file...")
+        multi_fasta = create_multi_fasta_with_query(f_gene_sequence)
+        if not multi_fasta:
+            return False, "Failed to create input FASTA", None, None
+        # Step 2: Run MAFFT alignment
+        logging.info("Running MAFFT alignment...")
+        aligned_fasta = os.path.join(output_dir, "aligned_sequences.fasta")
+        mafft_success, mafft_result = run_mafft_alignment(multi_fasta, aligned_fasta)
+        # Clean up temporary file
+        os.unlink(multi_fasta)
+        if not mafft_success:
+            return False, f"MAFFT failed: {mafft_result}", None, None
+        # Step 3: Run IQ-TREE analysis
+        logging.info("Running IQ-TREE analysis...")
+        tree_prefix = os.path.join(output_dir, "ml_tree")
+        iqtree_success, iqtree_result = run_iqtree_analysis(aligned_fasta, tree_prefix)
+        if not iqtree_success:
+            return False, f"IQ-TREE failed: {iqtree_result}", aligned_fasta, None
+        # Step 4: Prepare output files
+        tree_file = iqtree_result
+        log_file = f"{tree_prefix}.log"
+        # Copy to standard names for compatibility
+        standard_aligned = "f_gene_sequences_aligned.fasta"
+        standard_tree = "f_gene_sequences.phy.treefile"
+        if os.path.exists(aligned_fasta):
+            shutil.copy2(aligned_fasta, standard_aligned)
+        if os.path.exists(tree_file):
+            shutil.copy2(tree_file, standard_tree)
+        success_msg = f"✅ Maximum likelihood tree built successfully!\n"
+        success_msg += f"- Alignment: {os.path.basename(aligned_fasta)}\n"
+        success_msg += f"- Tree: {os.path.basename(tree_file)}\n"
+        if os.path.exists(log_file):
+            with open(log_file, 'r') as f:
+                log_content = f.read()
+                # Extract model information
+                if "Best-fit model:" in log_content:
+                    model_line = [line for line in log_content.split('\n') if "Best-fit model:" in line][0]
+                    success_msg += f"- {model_line.strip()}\n"
+        logging.info("Maximum likelihood tree construction completed")
+        return True, success_msg, aligned_fasta, tree_file
+    except Exception as e:
+        logging.error(f"ML tree construction failed: {e}")
+        return False, f"ML tree construction failed: {str(e)}", None, None
 # --- Tree Analysis Function (Based on old Gradio API) ---
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> str:
     """
         return ""
 # --- Full Pipeline ---
+def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     try:
         dna_input = read_fasta_file(fasta_file_obj)
         if not dna_input:
+            return "Failed to read FASTA file", "", "", "", "", None, None, None, "No input sequence"
+        return run_pipeline(dna_input, similarity_score, build_ml_tree)
     except Exception as e:
         error_msg = f"Pipeline error: {str(e)}"
         logging.error(error_msg)
+        return error_msg, "", "", "", "", None, None, None, error_msg
+def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         # Clean input
         dna_input = dna_input.upper().strip()
         if not dna_input:
+            return "Empty input", "", "", "", "", None, None, None, "No input provided"
         # Sanitize DNA sequence
         if not re.match('^[ACTGN]+$', dna_input):
         else:
             keras_output = "Skipped: sequence too short for F gene validation"
+        # Step 3: Maximum Likelihood Tree (MAFFT + IQ-TREE)
         aligned_file = None
         phy_file = None
+        ml_tree_output = ""
+        if build_ml_tree and processed_sequence and len(processed_sequence) >= 50:
+            try:
+                logging.info("Starting maximum likelihood tree construction...")
+                ml_success, ml_message, ml_aligned, ml_tree = build_maximum_likelihood_tree(processed_sequence)
+                if ml_success:
+                    ml_tree_output = ml_message
+                    aligned_file = ml_aligned
+                    phy_file = ml_tree
+                else:
+                    ml_tree_output = f"❌ ML Tree failed: {ml_message}"
+            except Exception as e:
+                ml_tree_output = f"❌ ML Tree construction failed: {str(e)}"
+                logging.error(f"ML Tree failed: {e}")
+        elif build_ml_tree:
+            ml_tree_output = "❌ F gene sequence too short for ML tree construction (minimum 50 bp)"
+        else:
+            ml_tree_output = "ML tree construction skipped (not requested)"
+        # Step 4: ML Simplified Tree (using the existing approach)
         html_file = None
         tree_html_content = "No tree generated"
+        simplified_ml_output = ""
         if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
+                logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
+                # Use the existing tree analysis function with user-specified similarity
                 tree_result = analyze_sequence_for_tree(processed_sequence, matching_percentage=similarity_score)
                 if tree_result and not tree_result.startswith("Error:"):
                     # Success - we have HTML content
                     tree_html_content = tree_result
+                    simplified_ml_output = "✅ Simplified phylogenetic tree generated successfully!"
                     # Check if HTML file was created
                     output_dir = "output"
                         html_files = [f for f in os.listdir(output_dir) if f.endswith('.html')]
                         if html_files:
                             html_file = os.path.join(output_dir, html_files[-1])  # Get the latest
+                            simplified_ml_output += f"\n- Tree file: {html_files[-1]}"
                     # Count sequences analyzed
                     if analyzer.find_query_sequence(processed_sequence):
                         matched_ids, perc = analyzer.find_similar_sequences(similarity_score)
+                        simplified_ml_output += f"\n- {len(matched_ids)} sequences analyzed"
+                        simplified_ml_output += f"\n- Similarity threshold: {perc:.1f}%"
                 else:
                     # Error occurred
+                    simplified_ml_output = f"❌ Simplified tree analysis failed: {tree_result}"
+                    logging.error(f"Simplified tree analysis failed: {tree_result}")
             except Exception as e:
+                simplified_ml_output = f"❌ Simplified ML Tree analysis failed: {str(e)}"
+                logging.error(f"Simplified ML Tree failed: {e}")
                 import traceback
                 logging.error(f"Full traceback: {traceback.format_exc()}")
         elif not analyzer:
+            simplified_ml_output = "❌ Tree analyzer not initialized"
         elif not processed_sequence or len(processed_sequence) < 10:
+            simplified_ml_output = f"❌ F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
         else:
+            simplified_ml_output = "❌ Skipped due to previous step errors"
         return (
             boundary_output,
             keras_output[:500] + "..." if len(keras_output) > 500 else keras_output,
             csv_path if os.path.exists(csv_path) else "CSV file not found",
+            ml_tree_output,
+            simplified_ml_output,
             html_file,
             aligned_file if aligned_file and os.path.exists(aligned_file) else None,
             phy_file if phy_file and os.path.exists(phy_file) else None,
         logging.error(error_msg)
         import traceback
         logging.error(f"Full traceback: {traceback.format_exc()}")
+        return error_msg, "", "", "", "", None, None, None, error_msg
 # --- Gradio UI ---
 with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧬 Viral Gene Phylogenetic Inference Pipeline")
+    gr.Markdown("This pipeline processes DNA sequences through boundary detection, k-mer analysis, and phylogenetic tree construction using both simplified ML and full maximum likelihood approaches.")
     with gr.Tab("📝 Paste DNA Sequence"):
         with gr.Row():
+            with gr.Column(scale=2):
                 inp = gr.Textbox(
                     label="DNA Input",
                     placeholder="Paste your DNA sequence here (ACTG format)",
                     label="Similarity Threshold (%)",
                     info="Higher values = more similar sequences"
                 )
+                ml_tree_checkbox = gr.Checkbox(
+                    label="Build Maximum Likelihood Tree",
+                    value=False,
+                    info="Use MAFFT + IQ-TREE (slower but more accurate)"
+                )
         btn1 = gr.Button("🚀 Run Pipeline", variant="primary", size="lg")
     with gr.Tab("📁 Upload FASTA File"):
         with gr.Row():
+            with gr.Column(scale=2):
                 file_input = gr.File(
                     label="FASTA File",
                     file_types=['.fasta', '.fa', '.txt']
                     label="Similarity Threshold (%)",
                     info="Higher values = more similar sequences"
                 )
+                ml_tree_checkbox_file = gr.Checkbox(
+                    label="Build Maximum Likelihood Tree",
+                    value=False,
+                    info="Use MAFFT + IQ-TREE (slower but more accurate)"
+                )
         btn2 = gr.Button("🚀 Run on FASTA", variant="primary", size="lg")
     # Outputs
         with gr.Column():
             out1 = gr.Textbox(label="🎯 Step 1: Extracted F Gene Sequence", lines=8)
             out2 = gr.Textbox(label="🔍 Step 2: F Gene Validation (Keras)", lines=3)
             out3 = gr.Textbox(label="📋 Dataset Used")
+        with gr.Column():
+            out4 = gr.Textbox(label="🌳 Step 3: Maximum Likelihood Tree (MAFFT+IQ-TREE)", lines=5)
+            out5 = gr.Textbox(label="🌿 Step 4: Simplified ML Tree Status", lines=5)
     with gr.Row():
+        html = gr.File(label="📥 Download Interactive Tree (HTML)")
         fasta = gr.File(label="📥 Download Aligned FASTA")
+        phy = gr.File(label="📥 Download ML Tree File")
     with gr.Row():
         tree_html = gr.HTML(label="🌳 Interactive Tree Preview")
     # Event handlers
     btn1.click(
         fn=run_pipeline,
+        inputs=[inp, similarity_input, ml_tree_checkbox],
+        outputs=[out1, out2, out3, out4, out5, html, fasta, phy, tree_html]
     )
     btn2.click(
         fn=run_pipeline_from_file,
+        inputs=[file_input, similarity_input_file, ml_tree_checkbox_file],
+        outputs=[out1, out2, out3, out4, out5, html, fasta, phy, tree_html]
     )
 if __name__ == '__main__':