re-type commited on
Commit
f70096f
·
verified ·
1 Parent(s): 95a0193

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -32
app.py CHANGED
@@ -866,7 +866,6 @@ def create_interface():
866
  )
867
  with gr.Row():
868
  show_tree_link = gr.HTML()
869
- debug_output = gr.Textbox(label="Debug Info", interactive=False)
870
 
871
  # File downloads
872
  gr.Markdown("### 📁 Download Results")
@@ -886,40 +885,91 @@ def create_interface():
886
  interactive=False
887
  )
888
 
889
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
 
891
  # Event handlers
892
  def run_analysis_text(dna_seq, sim_score, build_tree):
893
- results = run_pipeline(dna_seq, sim_score, build_tree)
894
- html_file_path = results.get('html_file', None) # Fallback to check dictionary
895
- return (results.get('boundary_output', ''),
896
- results.get('keras_output', ''),
897
- results.get('ml_tree_output', ''),
898
- results.get('simplified_ml_output', ''),
899
- tree_html,
900
- results.get('aligned_file', None),
901
- results.get('phy_file', None),
902
- html_file_path if html_file_path and os.path.exists(html_file_path) else None,
903
- "Analysis complete")
904
-
905
  def run_analysis_file(file_obj, sim_score, build_tree):
906
- results = run_pipeline_from_file(file_obj, sim_score, build_tree)
907
- html_file_path = results.get('html_file', None)
908
- return (results.get('boundary_output', ''),
909
- results.get('keras_output', ''),
910
- results.get('ml_tree_output', ''),
911
- results.get('simplified_ml_output', ''),
912
- tree_html,
913
- results.get('aligned_file', None),
914
- results.get('phy_file', None),
915
- html_file_path if html_file_path and os.path.exists(html_file_path) else None,
916
- "Analysis complete")
917
-
918
  def run_analysis_combined(dna_seq, file_obj, sim_score, build_tree):
 
919
  if file_obj is not None:
920
- return run_analysis_file(file_obj, sim_score, build_tree)
921
  else:
922
- return run_analysis_text(dna_seq, sim_score, build_tree)
923
 
924
  def clear_inputs():
925
  return "", None, 95.0, False, "Ready to analyze"
@@ -927,13 +977,14 @@ def create_interface():
927
  def show_tree(html_file):
928
  if html_file and os.path.exists(html_file):
929
  try:
 
930
  relative_path = os.path.relpath(html_file, os.getcwd())
931
  file_url = f"/file={relative_path}"
932
  link_html = f'<a href="{file_url}" target="_blank">View ML Simplified Tree</a>'
933
- return gr.update(value=link_html), f"File found: {relative_path}"
934
  except Exception as e:
935
- return gr.update(value=f"<p>Error generating link: {str(e)}</p>"), f"Error: {str(e)}"
936
- return gr.update(value="<p>No tree file available</p>"), "No file available"
937
 
938
  # Connect events
939
  run_btn.click(
@@ -954,7 +1005,7 @@ def create_interface():
954
  show_tree_link.click(
955
  fn=show_tree,
956
  inputs=[html_file],
957
- outputs=[show_tree_link, debug_output]
958
  )
959
 
960
  # Example data loading
 
866
  )
867
  with gr.Row():
868
  show_tree_link = gr.HTML()
 
869
 
870
  # File downloads
871
  gr.Markdown("### 📁 Download Results")
 
885
  interactive=False
886
  )
887
 
888
+ with gr.Tab("ℹ️ Help & Info"):
889
+ gr.Markdown("""
890
+ ## About This Tool
891
+
892
+ ### F Gene Analysis Pipeline
893
+ This comprehensive pipeline analyzes F genes through multiple computational approaches:
894
+
895
+ #### 🎯 Gene Boundary Detection
896
+ - Uses deep learning to identify and extract F gene sequences from larger genomic sequences
897
+ - Provides confidence scores for detected boundaries
898
+ - Automatically trims sequences to focus on the F gene region
899
+
900
+ #### 🔍 Gene Validation
901
+ - Employs k-mer based machine learning models to validate extracted sequences
902
+ - Provides probability scores indicating likelihood of being a genuine F gene
903
+ - Uses 6-mer frequency patterns for classification
904
+
905
+ #### 🌳 Phylogenetic Analysis
906
+
907
+ **Maximum Likelihood Trees:**
908
+ - Requires MAFFT (sequence alignment) and IQ-TREE (phylogenetic reconstruction)
909
+ - Performs model selection and bootstrap analysis
910
+ - Generates publication-quality phylogenetic trees
911
+ - Provides detailed evolutionary analysis
912
+
913
+ **Simplified Trees:**
914
+ - Uses built-in algorithms for quick phylogenetic analysis
915
+ - Interactive visualization with similarity-based clustering
916
+ - Faster alternative when external tools are not available
917
+
918
+ ### Input Requirements
919
+ - **DNA Sequences**: ATCG format, minimum 50 bp for meaningful analysis
920
+ - **FASTA Files**: Standard FASTA format with single or multiple sequences
921
+ - **Similarity Threshold**: 1-99% for controlling phylogenetic analysis sensitivity
922
+
923
+ ### Dependencies
924
+
925
+ **Required for ML Trees:**
926
+ ```bash
927
+ # Ubuntu/Debian
928
+ sudo apt-get install mafft iqtree
929
+
930
+ # macOS
931
+ brew install mafft iqtree
932
+
933
+ # Conda
934
+ conda install -c bioconda mafft iqtree
935
+ ```
936
+
937
+ ### Output Files
938
+ - **Aligned FASTA**: Multiple sequence alignment in FASTA format
939
+ - **Tree File**: Newick format phylogenetic tree
940
+ - **HTML Tree**: Interactive visualization for web browsers (ML Simplified Tree)
941
+
942
+ ### Troubleshooting
943
+
944
+ **Common Issues:**
945
+ - *"No similar sequences found"*: Lower the similarity threshold
946
+ - *"Sequence too short"*: Provide sequences longer than 50 bp
947
+ - *"MAFFT/IQ-TREE not found"*: Install required dependencies
948
+ - *"Model not available"*: Check model files are properly downloaded
949
+
950
+ **Performance Tips:**
951
+ - Use sequences between 100-2000 bp for optimal performance
952
+ - Limit to <50 sequences for faster tree construction
953
+ - Lower similarity thresholds find more distant relatives
954
+ - Higher thresholds focus on closely related sequences
955
+
956
+ ### Citation
957
+ If you use this tool in your research, please cite the appropriate methods and tools used.
958
+ """)
959
 
960
  # Event handlers
961
  def run_analysis_text(dna_seq, sim_score, build_tree):
962
+ return run_pipeline(dna_seq, sim_score, build_tree)
963
+
 
 
 
 
 
 
 
 
 
 
964
  def run_analysis_file(file_obj, sim_score, build_tree):
965
+ return run_pipeline_from_file(file_obj, sim_score, build_tree)
966
+
 
 
 
 
 
 
 
 
 
 
967
  def run_analysis_combined(dna_seq, file_obj, sim_score, build_tree):
968
+ # Priority: file upload over text input
969
  if file_obj is not None:
970
+ return run_pipeline_from_file(file_obj, sim_score, build_tree)
971
  else:
972
+ return run_pipeline(dna_seq, sim_score, build_tree)
973
 
974
  def clear_inputs():
975
  return "", None, 95.0, False, "Ready to analyze"
 
977
  def show_tree(html_file):
978
  if html_file and os.path.exists(html_file):
979
  try:
980
+ # Convert to relative path for Gradio file serving
981
  relative_path = os.path.relpath(html_file, os.getcwd())
982
  file_url = f"/file={relative_path}"
983
  link_html = f'<a href="{file_url}" target="_blank">View ML Simplified Tree</a>'
984
+ return gr.update(value=link_html)
985
  except Exception as e:
986
+ return gr.update(value=f"<p>Error generating link: {str(e)}. Please download and open the file manually.</p>")
987
+ return gr.update(value="<p>No tree file available. Run analysis to generate one.</p>")
988
 
989
  # Connect events
990
  run_btn.click(
 
1005
  show_tree_link.click(
1006
  fn=show_tree,
1007
  inputs=[html_file],
1008
+ outputs=[show_tree_link]
1009
  )
1010
 
1011
  # Example data loading