Spaces:
No application file
No application file
Update app.py
Browse files
app.py
CHANGED
|
@@ -866,7 +866,6 @@ def create_interface():
|
|
| 866 |
)
|
| 867 |
with gr.Row():
|
| 868 |
show_tree_link = gr.HTML()
|
| 869 |
-
debug_output = gr.Textbox(label="Debug Info", interactive=False)
|
| 870 |
|
| 871 |
# File downloads
|
| 872 |
gr.Markdown("### 📁 Download Results")
|
|
@@ -886,40 +885,91 @@ def create_interface():
|
|
| 886 |
interactive=False
|
| 887 |
)
|
| 888 |
|
| 889 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 890 |
|
| 891 |
# Event handlers
|
| 892 |
def run_analysis_text(dna_seq, sim_score, build_tree):
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
return (results.get('boundary_output', ''),
|
| 896 |
-
results.get('keras_output', ''),
|
| 897 |
-
results.get('ml_tree_output', ''),
|
| 898 |
-
results.get('simplified_ml_output', ''),
|
| 899 |
-
tree_html,
|
| 900 |
-
results.get('aligned_file', None),
|
| 901 |
-
results.get('phy_file', None),
|
| 902 |
-
html_file_path if html_file_path and os.path.exists(html_file_path) else None,
|
| 903 |
-
"Analysis complete")
|
| 904 |
-
|
| 905 |
def run_analysis_file(file_obj, sim_score, build_tree):
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
return (results.get('boundary_output', ''),
|
| 909 |
-
results.get('keras_output', ''),
|
| 910 |
-
results.get('ml_tree_output', ''),
|
| 911 |
-
results.get('simplified_ml_output', ''),
|
| 912 |
-
tree_html,
|
| 913 |
-
results.get('aligned_file', None),
|
| 914 |
-
results.get('phy_file', None),
|
| 915 |
-
html_file_path if html_file_path and os.path.exists(html_file_path) else None,
|
| 916 |
-
"Analysis complete")
|
| 917 |
-
|
| 918 |
def run_analysis_combined(dna_seq, file_obj, sim_score, build_tree):
|
|
|
|
| 919 |
if file_obj is not None:
|
| 920 |
-
return
|
| 921 |
else:
|
| 922 |
-
return
|
| 923 |
|
| 924 |
def clear_inputs():
|
| 925 |
return "", None, 95.0, False, "Ready to analyze"
|
|
@@ -927,13 +977,14 @@ def create_interface():
|
|
| 927 |
def show_tree(html_file):
|
| 928 |
if html_file and os.path.exists(html_file):
|
| 929 |
try:
|
|
|
|
| 930 |
relative_path = os.path.relpath(html_file, os.getcwd())
|
| 931 |
file_url = f"/file={relative_path}"
|
| 932 |
link_html = f'<a href="{file_url}" target="_blank">View ML Simplified Tree</a>'
|
| 933 |
-
return gr.update(value=link_html)
|
| 934 |
except Exception as e:
|
| 935 |
-
return gr.update(value=f"<p>Error generating link: {str(e)}</p>")
|
| 936 |
-
return gr.update(value="<p>No tree file available</p>")
|
| 937 |
|
| 938 |
# Connect events
|
| 939 |
run_btn.click(
|
|
@@ -954,7 +1005,7 @@ def create_interface():
|
|
| 954 |
show_tree_link.click(
|
| 955 |
fn=show_tree,
|
| 956 |
inputs=[html_file],
|
| 957 |
-
outputs=[show_tree_link
|
| 958 |
)
|
| 959 |
|
| 960 |
# Example data loading
|
|
|
|
| 866 |
)
|
| 867 |
with gr.Row():
|
| 868 |
show_tree_link = gr.HTML()
|
|
|
|
| 869 |
|
| 870 |
# File downloads
|
| 871 |
gr.Markdown("### 📁 Download Results")
|
|
|
|
| 885 |
interactive=False
|
| 886 |
)
|
| 887 |
|
| 888 |
+
with gr.Tab("ℹ️ Help & Info"):
|
| 889 |
+
gr.Markdown("""
|
| 890 |
+
## About This Tool
|
| 891 |
+
|
| 892 |
+
### F Gene Analysis Pipeline
|
| 893 |
+
This comprehensive pipeline analyzes F genes through multiple computational approaches:
|
| 894 |
+
|
| 895 |
+
#### 🎯 Gene Boundary Detection
|
| 896 |
+
- Uses deep learning to identify and extract F gene sequences from larger genomic sequences
|
| 897 |
+
- Provides confidence scores for detected boundaries
|
| 898 |
+
- Automatically trims sequences to focus on the F gene region
|
| 899 |
+
|
| 900 |
+
#### 🔍 Gene Validation
|
| 901 |
+
- Employs k-mer based machine learning models to validate extracted sequences
|
| 902 |
+
- Provides probability scores indicating likelihood of being a genuine F gene
|
| 903 |
+
- Uses 6-mer frequency patterns for classification
|
| 904 |
+
|
| 905 |
+
#### 🌳 Phylogenetic Analysis
|
| 906 |
+
|
| 907 |
+
**Maximum Likelihood Trees:**
|
| 908 |
+
- Requires MAFFT (sequence alignment) and IQ-TREE (phylogenetic reconstruction)
|
| 909 |
+
- Performs model selection and bootstrap analysis
|
| 910 |
+
- Generates publication-quality phylogenetic trees
|
| 911 |
+
- Provides detailed evolutionary analysis
|
| 912 |
+
|
| 913 |
+
**Simplified Trees:**
|
| 914 |
+
- Uses built-in algorithms for quick phylogenetic analysis
|
| 915 |
+
- Interactive visualization with similarity-based clustering
|
| 916 |
+
- Faster alternative when external tools are not available
|
| 917 |
+
|
| 918 |
+
### Input Requirements
|
| 919 |
+
- **DNA Sequences**: ATCG format, minimum 50 bp for meaningful analysis
|
| 920 |
+
- **FASTA Files**: Standard FASTA format with single or multiple sequences
|
| 921 |
+
- **Similarity Threshold**: 1-99% for controlling phylogenetic analysis sensitivity
|
| 922 |
+
|
| 923 |
+
### Dependencies
|
| 924 |
+
|
| 925 |
+
**Required for ML Trees:**
|
| 926 |
+
```bash
|
| 927 |
+
# Ubuntu/Debian
|
| 928 |
+
sudo apt-get install mafft iqtree
|
| 929 |
+
|
| 930 |
+
# macOS
|
| 931 |
+
brew install mafft iqtree
|
| 932 |
+
|
| 933 |
+
# Conda
|
| 934 |
+
conda install -c bioconda mafft iqtree
|
| 935 |
+
```
|
| 936 |
+
|
| 937 |
+
### Output Files
|
| 938 |
+
- **Aligned FASTA**: Multiple sequence alignment in FASTA format
|
| 939 |
+
- **Tree File**: Newick format phylogenetic tree
|
| 940 |
+
- **HTML Tree**: Interactive visualization for web browsers (ML Simplified Tree)
|
| 941 |
+
|
| 942 |
+
### Troubleshooting
|
| 943 |
+
|
| 944 |
+
**Common Issues:**
|
| 945 |
+
- *"No similar sequences found"*: Lower the similarity threshold
|
| 946 |
+
- *"Sequence too short"*: Provide sequences longer than 50 bp
|
| 947 |
+
- *"MAFFT/IQ-TREE not found"*: Install required dependencies
|
| 948 |
+
- *"Model not available"*: Check model files are properly downloaded
|
| 949 |
+
|
| 950 |
+
**Performance Tips:**
|
| 951 |
+
- Use sequences between 100-2000 bp for optimal performance
|
| 952 |
+
- Limit to <50 sequences for faster tree construction
|
| 953 |
+
- Lower similarity thresholds find more distant relatives
|
| 954 |
+
- Higher thresholds focus on closely related sequences
|
| 955 |
+
|
| 956 |
+
### Citation
|
| 957 |
+
If you use this tool in your research, please cite the appropriate methods and tools used.
|
| 958 |
+
""")
|
| 959 |
|
| 960 |
# Event handlers
|
| 961 |
def run_analysis_text(dna_seq, sim_score, build_tree):
|
| 962 |
+
return run_pipeline(dna_seq, sim_score, build_tree)
|
| 963 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 964 |
def run_analysis_file(file_obj, sim_score, build_tree):
|
| 965 |
+
return run_pipeline_from_file(file_obj, sim_score, build_tree)
|
| 966 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 967 |
def run_analysis_combined(dna_seq, file_obj, sim_score, build_tree):
|
| 968 |
+
# Priority: file upload over text input
|
| 969 |
if file_obj is not None:
|
| 970 |
+
return run_pipeline_from_file(file_obj, sim_score, build_tree)
|
| 971 |
else:
|
| 972 |
+
return run_pipeline(dna_seq, sim_score, build_tree)
|
| 973 |
|
| 974 |
def clear_inputs():
|
| 975 |
return "", None, 95.0, False, "Ready to analyze"
|
|
|
|
| 977 |
def show_tree(html_file):
|
| 978 |
if html_file and os.path.exists(html_file):
|
| 979 |
try:
|
| 980 |
+
# Convert to relative path for Gradio file serving
|
| 981 |
relative_path = os.path.relpath(html_file, os.getcwd())
|
| 982 |
file_url = f"/file={relative_path}"
|
| 983 |
link_html = f'<a href="{file_url}" target="_blank">View ML Simplified Tree</a>'
|
| 984 |
+
return gr.update(value=link_html)
|
| 985 |
except Exception as e:
|
| 986 |
+
return gr.update(value=f"<p>Error generating link: {str(e)}. Please download and open the file manually.</p>")
|
| 987 |
+
return gr.update(value="<p>No tree file available. Run analysis to generate one.</p>")
|
| 988 |
|
| 989 |
# Connect events
|
| 990 |
run_btn.click(
|
|
|
|
| 1005 |
show_tree_link.click(
|
| 1006 |
fn=show_tree,
|
| 1007 |
inputs=[html_file],
|
| 1008 |
+
outputs=[show_tree_link]
|
| 1009 |
)
|
| 1010 |
|
| 1011 |
# Example data loading
|