re-type commited on
Commit
95a0193
·
verified ·
1 Parent(s): c20d2fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -82
app.py CHANGED
@@ -886,91 +886,40 @@ def create_interface():
886
  interactive=False
887
  )
888
 
889
- with gr.Tab("ℹ️ Help & Info"):
890
- gr.Markdown("""
891
- ## About This Tool
892
-
893
- ### F Gene Analysis Pipeline
894
- This comprehensive pipeline analyzes F genes through multiple computational approaches:
895
-
896
- #### 🎯 Gene Boundary Detection
897
- - Uses deep learning to identify and extract F gene sequences from larger genomic sequences
898
- - Provides confidence scores for detected boundaries
899
- - Automatically trims sequences to focus on the F gene region
900
-
901
- #### 🔍 Gene Validation
902
- - Employs k-mer based machine learning models to validate extracted sequences
903
- - Provides probability scores indicating likelihood of being a genuine F gene
904
- - Uses 6-mer frequency patterns for classification
905
-
906
- #### 🌳 Phylogenetic Analysis
907
-
908
- **Maximum Likelihood Trees:**
909
- - Requires MAFFT (sequence alignment) and IQ-TREE (phylogenetic reconstruction)
910
- - Performs model selection and bootstrap analysis
911
- - Generates publication-quality phylogenetic trees
912
- - Provides detailed evolutionary analysis
913
-
914
- **Simplified Trees:**
915
- - Uses built-in algorithms for quick phylogenetic analysis
916
- - Interactive visualization with similarity-based clustering
917
- - Faster alternative when external tools are not available
918
-
919
- ### Input Requirements
920
- - **DNA Sequences**: ATCG format, minimum 50 bp for meaningful analysis
921
- - **FASTA Files**: Standard FASTA format with single or multiple sequences
922
- - **Similarity Threshold**: 1-99% for controlling phylogenetic analysis sensitivity
923
-
924
- ### Dependencies
925
-
926
- **Required for ML Trees:**
927
- ```bash
928
- # Ubuntu/Debian
929
- sudo apt-get install mafft iqtree
930
-
931
- # macOS
932
- brew install mafft iqtree
933
-
934
- # Conda
935
- conda install -c bioconda mafft iqtree
936
- ```
937
-
938
- ### Output Files
939
- - **Aligned FASTA**: Multiple sequence alignment in FASTA format
940
- - **Tree File**: Newick format phylogenetic tree
941
- - **HTML Tree**: Interactive visualization for web browsers (ML Simplified Tree)
942
-
943
- ### Troubleshooting
944
-
945
- **Common Issues:**
946
- - *"No similar sequences found"*: Lower the similarity threshold
947
- - *"Sequence too short"*: Provide sequences longer than 50 bp
948
- - *"MAFFT/IQ-TREE not found"*: Install required dependencies
949
- - *"Model not available"*: Check model files are properly downloaded
950
-
951
- **Performance Tips:**
952
- - Use sequences between 100-2000 bp for optimal performance
953
- - Limit to <50 sequences for faster tree construction
954
- - Lower similarity thresholds find more distant relatives
955
- - Higher thresholds focus on closely related sequences
956
-
957
- ### Citation
958
- If you use this tool in your research, please cite the appropriate methods and tools used.
959
- """)
960
 
961
  # Event handlers
962
  def run_analysis_text(dna_seq, sim_score, build_tree):
963
- return run_pipeline(dna_seq, sim_score, build_tree)
964
-
 
 
 
 
 
 
 
 
 
 
965
  def run_analysis_file(file_obj, sim_score, build_tree):
966
- return run_pipeline_from_file(file_obj, sim_score, build_tree)
967
-
 
 
 
 
 
 
 
 
 
 
968
  def run_analysis_combined(dna_seq, file_obj, sim_score, build_tree):
969
- # Priority: file upload over text input
970
  if file_obj is not None:
971
- return run_pipeline_from_file(file_obj, sim_score, build_tree)
972
  else:
973
- return run_pipeline(dna_seq, sim_score, build_tree)
974
 
975
  def clear_inputs():
976
  return "", None, 95.0, False, "Ready to analyze"
@@ -978,14 +927,13 @@ def create_interface():
978
  def show_tree(html_file):
979
  if html_file and os.path.exists(html_file):
980
  try:
981
- # Convert to relative path for Gradio file serving
982
  relative_path = os.path.relpath(html_file, os.getcwd())
983
  file_url = f"/file={relative_path}"
984
  link_html = f'<a href="{file_url}" target="_blank">View ML Simplified Tree</a>'
985
  return gr.update(value=link_html), f"File found: {relative_path}"
986
  except Exception as e:
987
- return gr.update(value=f"<p>Error generating link: {str(e)}. Please download and open the file manually.</p>"), f"Error: {str(e)}"
988
- return gr.update(value="<p>No tree file available. Run analysis to generate one.</p>"), "No file available"
989
 
990
  # Connect events
991
  run_btn.click(
@@ -1023,7 +971,6 @@ def create_interface():
1023
  )
1024
 
1025
  return iface
1026
-
1027
  # --- Main Execution ---
1028
  if __name__ == "__main__":
1029
  # Initialize and launch interface
 
886
  interactive=False
887
  )
888
 
889
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
 
891
  # Event handlers
892
  def run_analysis_text(dna_seq, sim_score, build_tree):
893
+ results = run_pipeline(dna_seq, sim_score, build_tree)
894
+ html_file_path = results.get('html_file', None) # Fallback to check dictionary
895
+ return (results.get('boundary_output', ''),
896
+ results.get('keras_output', ''),
897
+ results.get('ml_tree_output', ''),
898
+ results.get('simplified_ml_output', ''),
899
+ tree_html,
900
+ results.get('aligned_file', None),
901
+ results.get('phy_file', None),
902
+ html_file_path if html_file_path and os.path.exists(html_file_path) else None,
903
+ "Analysis complete")
904
+
905
  def run_analysis_file(file_obj, sim_score, build_tree):
906
+ results = run_pipeline_from_file(file_obj, sim_score, build_tree)
907
+ html_file_path = results.get('html_file', None)
908
+ return (results.get('boundary_output', ''),
909
+ results.get('keras_output', ''),
910
+ results.get('ml_tree_output', ''),
911
+ results.get('simplified_ml_output', ''),
912
+ tree_html,
913
+ results.get('aligned_file', None),
914
+ results.get('phy_file', None),
915
+ html_file_path if html_file_path and os.path.exists(html_file_path) else None,
916
+ "Analysis complete")
917
+
918
  def run_analysis_combined(dna_seq, file_obj, sim_score, build_tree):
 
919
  if file_obj is not None:
920
+ return run_analysis_file(file_obj, sim_score, build_tree)
921
  else:
922
+ return run_analysis_text(dna_seq, sim_score, build_tree)
923
 
924
  def clear_inputs():
925
  return "", None, 95.0, False, "Ready to analyze"
 
927
  def show_tree(html_file):
928
  if html_file and os.path.exists(html_file):
929
  try:
 
930
  relative_path = os.path.relpath(html_file, os.getcwd())
931
  file_url = f"/file={relative_path}"
932
  link_html = f'<a href="{file_url}" target="_blank">View ML Simplified Tree</a>'
933
  return gr.update(value=link_html), f"File found: {relative_path}"
934
  except Exception as e:
935
+ return gr.update(value=f"<p>Error generating link: {str(e)}</p>"), f"Error: {str(e)}"
936
+ return gr.update(value="<p>No tree file available</p>"), "No file available"
937
 
938
  # Connect events
939
  run_btn.click(
 
971
  )
972
 
973
  return iface
 
974
  # --- Main Execution ---
975
  if __name__ == "__main__":
976
  # Initialize and launch interface