Spaces:
No application file
No application file
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,22 +27,22 @@ except ImportError:
|
|
| 27 |
from huggingface_hub import hf_hub_download
|
| 28 |
|
| 29 |
# --- Global Variables ---
|
| 30 |
-
MAFFT_PATH = "
|
| 31 |
-
IQTREE_PATH = "
|
| 32 |
-
CSV_PATH = "f_cleaned.csv" #
|
|
|
|
| 33 |
|
| 34 |
# --- Logging Setup ---
|
| 35 |
logging.basicConfig(
|
| 36 |
level=logging.INFO,
|
| 37 |
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 38 |
handlers=[
|
| 39 |
-
logging.FileHandler('gene_analysis.log'),
|
| 40 |
logging.StreamHandler(sys.stdout)
|
| 41 |
]
|
| 42 |
)
|
| 43 |
|
| 44 |
-
# --- Model
|
| 45 |
-
MODEL_REPO = "GGproject10/best_boundary_aware_model"
|
| 46 |
boundary_model = None
|
| 47 |
keras_model = None
|
| 48 |
kmer_to_index = None
|
|
@@ -51,7 +51,7 @@ analyzer = None
|
|
| 51 |
# --- Load Models ---
|
| 52 |
def load_models():
|
| 53 |
global boundary_model, keras_model, kmer_to_index
|
| 54 |
-
hf_token = os.getenv("HF_TOKEN")
|
| 55 |
|
| 56 |
# Load boundary model
|
| 57 |
if GenePredictor:
|
|
@@ -59,7 +59,8 @@ def load_models():
|
|
| 59 |
boundary_path = hf_hub_download(
|
| 60 |
repo_id=MODEL_REPO,
|
| 61 |
filename="best_boundary_aware_model.pth",
|
| 62 |
-
token=hf_token
|
|
|
|
| 63 |
)
|
| 64 |
boundary_model = GenePredictor(boundary_path)
|
| 65 |
logging.info("Boundary model loaded successfully.")
|
|
@@ -75,12 +76,14 @@ def load_models():
|
|
| 75 |
keras_path = hf_hub_download(
|
| 76 |
repo_id=MODEL_REPO,
|
| 77 |
filename="best_model.keras",
|
| 78 |
-
token=hf_token
|
|
|
|
| 79 |
)
|
| 80 |
kmer_path = hf_hub_download(
|
| 81 |
repo_id=MODEL_REPO,
|
| 82 |
filename="kmer_to_index.pkl",
|
| 83 |
-
token=hf_token
|
|
|
|
| 84 |
)
|
| 85 |
keras_model = load_model(keras_path)
|
| 86 |
with open(kmer_path, "rb") as f:
|
|
@@ -101,11 +104,6 @@ def init_tree_analyzer():
|
|
| 101 |
analyzer = ml_simplified_tree.PhylogeneticTreeAnalyzer()
|
| 102 |
if analyzer.load_data(CSV_PATH):
|
| 103 |
logging.info("Tree analyzer initialized successfully.")
|
| 104 |
-
try:
|
| 105 |
-
if not analyzer.train_ai_model():
|
| 106 |
-
logging.warning("AI model training failed.")
|
| 107 |
-
except Exception as e:
|
| 108 |
-
logging.warning(f"AI model training failed: {e}")
|
| 109 |
else:
|
| 110 |
logging.error("Failed to load CSV data.")
|
| 111 |
analyzer = None
|
|
@@ -118,44 +116,28 @@ def init_tree_analyzer():
|
|
| 118 |
|
| 119 |
# --- Tool Detection ---
|
| 120 |
def check_tool_availability():
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
]
|
| 124 |
-
iqtree_candidates = [
|
| 125 |
-
IQTREE_PATH, 'iqtree2', 'iqtree', '/usr/bin/iqtree2', '/usr/local/bin/iqtree2',
|
| 126 |
-
'/usr/bin/iqtree', '/usr/local/bin/iqtree', 'iqtree2.exe', 'iqtree.exe'
|
| 127 |
-
]
|
| 128 |
-
|
| 129 |
-
mafft_cmd = next((cmd for cmd in mafft_candidates if cmd and (os.path.exists(cmd) or shutil.which(cmd))), None)
|
| 130 |
-
iqtree_cmd = next((cmd for cmd in iqtree_candidates if cmd and (os.path.exists(cmd) or shutil.which(cmd))), None)
|
| 131 |
-
|
| 132 |
return bool(mafft_cmd), bool(iqtree_cmd), mafft_cmd, iqtree_cmd
|
| 133 |
|
| 134 |
# --- Installation Guide ---
|
| 135 |
def install_dependencies_guide():
|
| 136 |
return """
|
| 137 |
-
🔧
|
| 138 |
|
| 139 |
-
|
| 140 |
-
-
|
| 141 |
-
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
For IQ-TREE:
|
| 146 |
-
- Ubuntu/Debian: sudo apt-get install iqtree
|
| 147 |
-
- CentOS/RHEL: sudo yum install iqtree
|
| 148 |
-
- macOS: brew install iqtree
|
| 149 |
-
- Windows: Download from http://www.iqtree.org/
|
| 150 |
-
|
| 151 |
-
Conda: conda install -c bioconda mafft iqtree
|
| 152 |
"""
|
| 153 |
|
| 154 |
# --- MAFFT and IQ-TREE Functions ---
|
| 155 |
def run_mafft_alignment(input_fasta, output_fasta, mafft_cmd):
|
| 156 |
try:
|
| 157 |
cmd = [mafft_cmd, '--auto', '--quiet', input_fasta]
|
| 158 |
-
result = subprocess.run(cmd, capture_output=True, text=True, timeout=
|
| 159 |
if result.returncode == 0:
|
| 160 |
with open(output_fasta, 'w') as f:
|
| 161 |
f.write(result.stdout)
|
|
@@ -171,10 +153,10 @@ def run_mafft_alignment(input_fasta, output_fasta, mafft_cmd):
|
|
| 171 |
def run_iqtree_analysis(aligned_fasta, output_prefix, iqtree_cmd):
|
| 172 |
try:
|
| 173 |
cmd = [
|
| 174 |
-
iqtree_cmd, '-s', aligned_fasta, '-m', '
|
| 175 |
-
'-
|
| 176 |
]
|
| 177 |
-
result = subprocess.run(cmd, capture_output=True, text=True, timeout=
|
| 178 |
tree_file = f"{output_prefix}.treefile"
|
| 179 |
if result.returncode == 0 and os.path.exists(tree_file) and os.path.getsize(tree_file) > 0:
|
| 180 |
logging.info(f"IQ-TREE completed: {tree_file}")
|
|
@@ -184,26 +166,12 @@ def run_iqtree_analysis(aligned_fasta, output_prefix, iqtree_cmd):
|
|
| 184 |
logging.error(f"IQ-TREE failed: {e}")
|
| 185 |
return False, f"IQ-TREE failed: {str(e)}"
|
| 186 |
|
| 187 |
-
# --- Fallback Tree Construction ---
|
| 188 |
-
def create_simple_tree(sequences_dict):
|
| 189 |
-
try:
|
| 190 |
-
seq_names = list(sequences_dict.keys())
|
| 191 |
-
if len(seq_names) < 2:
|
| 192 |
-
return None, "Need at least 2 sequences."
|
| 193 |
-
tree_str = f"({','.join([f'{name}:0.1' for name in seq_names[:5]])});"
|
| 194 |
-
tree_file = "simple_tree.nwk"
|
| 195 |
-
with open(tree_file, 'w') as f:
|
| 196 |
-
f.write(tree_str)
|
| 197 |
-
return tree_file, "Simple tree created."
|
| 198 |
-
except Exception as e:
|
| 199 |
-
return None, f"Simple tree creation failed: {str(e)}"
|
| 200 |
-
|
| 201 |
# --- Create Multi-FASTA ---
|
| 202 |
def create_multi_fasta(query_sequence, query_id="Query_F_Gene"):
|
| 203 |
try:
|
| 204 |
-
temp_fasta = tempfile.NamedTemporaryFile(mode='w', suffix='.fasta', delete=False)
|
| 205 |
temp_fasta.write(f">{query_id}\n{query_sequence}\n")
|
| 206 |
-
ref_fasta_path = "f_gene_sequences_aligned.fasta"
|
| 207 |
if os.path.exists(ref_fasta_path):
|
| 208 |
with open(ref_fasta_path, 'r') as ref_file:
|
| 209 |
temp_fasta.write(ref_file.read())
|
|
@@ -213,7 +181,7 @@ def create_multi_fasta(query_sequence, query_id="Query_F_Gene"):
|
|
| 213 |
if 'sequence' in row and len(str(row['sequence'])) > 50:
|
| 214 |
temp_fasta.write(f">{row.get('id', f'Ref_{count}')}\n{str(row['sequence']).upper()}\n")
|
| 215 |
count += 1
|
| 216 |
-
if count >=
|
| 217 |
break
|
| 218 |
temp_fasta.close()
|
| 219 |
return temp_fasta.name
|
|
@@ -237,26 +205,26 @@ def build_maximum_likelihood_tree(sequence):
|
|
| 237 |
guide = install_dependencies_guide()
|
| 238 |
return False, f"{status_msg}\n❌ Missing tools:\n{guide}", None, None
|
| 239 |
|
| 240 |
-
os.makedirs("ml_tree_output", exist_ok=True)
|
| 241 |
multi_fasta = create_multi_fasta(sequence)
|
| 242 |
if not multi_fasta:
|
| 243 |
return False, f"{status_msg}\n❌ Failed to create input FASTA.", None, None
|
| 244 |
|
| 245 |
-
aligned_fasta = "ml_tree_output/aligned_sequences.fasta"
|
| 246 |
mafft_success, mafft_result = run_mafft_alignment(multi_fasta, aligned_fasta, mafft_cmd)
|
| 247 |
os.unlink(multi_fasta)
|
| 248 |
|
| 249 |
if not mafft_success:
|
| 250 |
return False, f"{status_msg}\n❌ MAFFT failed: {mafft_result}", None, None
|
| 251 |
|
| 252 |
-
tree_prefix = "ml_tree_output/ml_tree"
|
| 253 |
iqtree_success, iqtree_result = run_iqtree_analysis(aligned_fasta, tree_prefix, iqtree_cmd)
|
| 254 |
if not iqtree_success:
|
| 255 |
return False, f"{status_msg}\n❌ IQ-TREE failed: {iqtree_result}", aligned_fasta, None
|
| 256 |
|
| 257 |
tree_file = iqtree_result
|
| 258 |
-
shutil.copy2(aligned_fasta, "f_gene_sequences_aligned.fasta")
|
| 259 |
-
shutil.copy2(tree_file, "f_gene_sequences.phy.treefile")
|
| 260 |
|
| 261 |
success_msg = f"{status_msg}\n✅ ML tree built:\n- Alignment: {os.path.basename(aligned_fasta)}\n- Tree: {os.path.basename(tree_file)}"
|
| 262 |
return True, success_msg, aligned_fasta, tree_file
|
|
@@ -352,6 +320,49 @@ def build_tree(sequence):
|
|
| 352 |
success, message, aligned_fasta, tree_file = build_maximum_likelihood_tree(sequence)
|
| 353 |
return format_results({"message": message, "tree_file": tree_file}, sequence, "tree")
|
| 354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
# --- Gradio Interface ---
|
| 356 |
def create_gradio_interface():
|
| 357 |
css = """
|
|
@@ -385,8 +396,8 @@ def create_gradio_interface():
|
|
| 385 |
with gr.Column(scale=2):
|
| 386 |
output = gr.Textbox(
|
| 387 |
label="Results",
|
| 388 |
-
lines=
|
| 389 |
-
max_lines=
|
| 390 |
elem_classes=["output-text"]
|
| 391 |
)
|
| 392 |
|
|
@@ -408,53 +419,11 @@ def create_gradio_interface():
|
|
| 408 |
|
| 409 |
return interface
|
| 410 |
|
| 411 |
-
# --- File Processing ---
|
| 412 |
-
def process_fasta_file(file):
|
| 413 |
-
try:
|
| 414 |
-
if not file:
|
| 415 |
-
return "Please upload a FASTA file."
|
| 416 |
-
|
| 417 |
-
sequences = {}
|
| 418 |
-
current_seq = ""
|
| 419 |
-
current_name = ""
|
| 420 |
-
with open(file.name, 'r') as f:
|
| 421 |
-
for line in f:
|
| 422 |
-
line = line.strip()
|
| 423 |
-
if line.startswith('>'):
|
| 424 |
-
if current_name and current_seq:
|
| 425 |
-
sequences[current_name] = current_seq
|
| 426 |
-
current_name = line[1:]
|
| 427 |
-
current_seq = ""
|
| 428 |
-
else:
|
| 429 |
-
current_seq += line.upper()
|
| 430 |
-
if current_name and current_seq:
|
| 431 |
-
sequences[current_name] = current_seq
|
| 432 |
-
|
| 433 |
-
if not sequences:
|
| 434 |
-
return "No valid sequences in FASTA file."
|
| 435 |
-
|
| 436 |
-
results = [f"📁 FASTA FILE ANALYSIS\nFound {len(sequences)} sequences\n{'=' * 50}"]
|
| 437 |
-
for i, (name, seq) in enumerate(sequences.items()):
|
| 438 |
-
if i >= 5:
|
| 439 |
-
results.append(f"\n... and {len(sequences) - 5} more sequences")
|
| 440 |
-
break
|
| 441 |
-
results.append(f"\n🧬 Sequence: {name}\nLength: {len(seq)} bp")
|
| 442 |
-
clean_seq = re.sub(r'[^ATCG]', '', seq)
|
| 443 |
-
if len(clean_seq) >= 10:
|
| 444 |
-
results.append(analyze_sequence(clean_seq))
|
| 445 |
-
else:
|
| 446 |
-
results.append("❌ Sequence too short or invalid")
|
| 447 |
-
results.append("-" * 40)
|
| 448 |
-
|
| 449 |
-
return "\n".join(results)
|
| 450 |
-
except Exception as e:
|
| 451 |
-
logging.error(f"FASTA processing failed: {e}")
|
| 452 |
-
return f"FASTA processing failed: {str(e)}"
|
| 453 |
-
|
| 454 |
# --- Main ---
|
| 455 |
if __name__ == "__main__":
|
| 456 |
-
os.makedirs("
|
| 457 |
-
os.makedirs("ml_tree_output", exist_ok=True)
|
|
|
|
| 458 |
|
| 459 |
load_models()
|
| 460 |
init_tree_analyzer()
|
|
@@ -467,11 +436,9 @@ if __name__ == "__main__":
|
|
| 467 |
try:
|
| 468 |
interface = create_gradio_interface()
|
| 469 |
interface.launch(
|
| 470 |
-
share=False,
|
| 471 |
server_name="0.0.0.0",
|
| 472 |
server_port=7860,
|
| 473 |
-
|
| 474 |
-
debug=True
|
| 475 |
)
|
| 476 |
except Exception as e:
|
| 477 |
logging.error(f"Interface launch failed: {e}")
|
|
|
|
| 27 |
from huggingface_hub import hf_hub_download
|
| 28 |
|
| 29 |
# --- Global Variables ---
|
| 30 |
+
MAFFT_PATH = "/usr/bin/mafft" # Common path in Hugging Face Spaces
|
| 31 |
+
IQTREE_PATH = "/usr/bin/iqtree2" # Common path in Hugging Face Spaces
|
| 32 |
+
CSV_PATH = "/data/f_cleaned.csv" # Persistent storage in Hugging Face
|
| 33 |
+
MODEL_REPO = "GGproject10/best_boundary_aware_model"
|
| 34 |
|
| 35 |
# --- Logging Setup ---
|
| 36 |
logging.basicConfig(
|
| 37 |
level=logging.INFO,
|
| 38 |
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 39 |
handlers=[
|
| 40 |
+
logging.FileHandler('/data/gene_analysis.log'),
|
| 41 |
logging.StreamHandler(sys.stdout)
|
| 42 |
]
|
| 43 |
)
|
| 44 |
|
| 45 |
+
# --- Model Variables ---
|
|
|
|
| 46 |
boundary_model = None
|
| 47 |
keras_model = None
|
| 48 |
kmer_to_index = None
|
|
|
|
| 51 |
# --- Load Models ---
|
| 52 |
def load_models():
|
| 53 |
global boundary_model, keras_model, kmer_to_index
|
| 54 |
+
hf_token = os.getenv("HF_TOKEN", None)
|
| 55 |
|
| 56 |
# Load boundary model
|
| 57 |
if GenePredictor:
|
|
|
|
| 59 |
boundary_path = hf_hub_download(
|
| 60 |
repo_id=MODEL_REPO,
|
| 61 |
filename="best_boundary_aware_model.pth",
|
| 62 |
+
token=hf_token,
|
| 63 |
+
cache_dir="/data/models"
|
| 64 |
)
|
| 65 |
boundary_model = GenePredictor(boundary_path)
|
| 66 |
logging.info("Boundary model loaded successfully.")
|
|
|
|
| 76 |
keras_path = hf_hub_download(
|
| 77 |
repo_id=MODEL_REPO,
|
| 78 |
filename="best_model.keras",
|
| 79 |
+
token=hf_token,
|
| 80 |
+
cache_dir="/data/models"
|
| 81 |
)
|
| 82 |
kmer_path = hf_hub_download(
|
| 83 |
repo_id=MODEL_REPO,
|
| 84 |
filename="kmer_to_index.pkl",
|
| 85 |
+
token=hf_token,
|
| 86 |
+
cache_dir="/data/models"
|
| 87 |
)
|
| 88 |
keras_model = load_model(keras_path)
|
| 89 |
with open(kmer_path, "rb") as f:
|
|
|
|
| 104 |
analyzer = ml_simplified_tree.PhylogeneticTreeAnalyzer()
|
| 105 |
if analyzer.load_data(CSV_PATH):
|
| 106 |
logging.info("Tree analyzer initialized successfully.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
else:
|
| 108 |
logging.error("Failed to load CSV data.")
|
| 109 |
analyzer = None
|
|
|
|
| 116 |
|
| 117 |
# --- Tool Detection ---
|
| 118 |
def check_tool_availability():
|
| 119 |
+
mafft_cmd = shutil.which(MAFFT_PATH) or shutil.which("mafft")
|
| 120 |
+
iqtree_cmd = shutil.which(IQTREE_PATH) or shutil.which("iqtree2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
return bool(mafft_cmd), bool(iqtree_cmd), mafft_cmd, iqtree_cmd
|
| 122 |
|
| 123 |
# --- Installation Guide ---
|
| 124 |
def install_dependencies_guide():
|
| 125 |
return """
|
| 126 |
+
🔧 DEPENDENCY SETUP FOR HUGGING FACE SPACES:
|
| 127 |
|
| 128 |
+
1. Add to requirements.txt:
|
| 129 |
+
- mafft
|
| 130 |
+
- iqtree
|
| 131 |
+
2. Place f_cleaned.csv in the repository root.
|
| 132 |
+
3. Ensure HF_TOKEN is set in Space secrets for model downloads.
|
| 133 |
+
4. If dependencies fail, contact Hugging Face support or use a custom Docker image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
"""
|
| 135 |
|
| 136 |
# --- MAFFT and IQ-TREE Functions ---
|
| 137 |
def run_mafft_alignment(input_fasta, output_fasta, mafft_cmd):
|
| 138 |
try:
|
| 139 |
cmd = [mafft_cmd, '--auto', '--quiet', input_fasta]
|
| 140 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) # Reduced timeout for HF
|
| 141 |
if result.returncode == 0:
|
| 142 |
with open(output_fasta, 'w') as f:
|
| 143 |
f.write(result.stdout)
|
|
|
|
| 153 |
def run_iqtree_analysis(aligned_fasta, output_prefix, iqtree_cmd):
|
| 154 |
try:
|
| 155 |
cmd = [
|
| 156 |
+
iqtree_cmd, '-s', aligned_fasta, '-m', 'GTR', '-nt', '1', # Simplified for HF resources
|
| 157 |
+
'--prefix', output_prefix, '--quiet'
|
| 158 |
]
|
| 159 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) # Reduced timeout
|
| 160 |
tree_file = f"{output_prefix}.treefile"
|
| 161 |
if result.returncode == 0 and os.path.exists(tree_file) and os.path.getsize(tree_file) > 0:
|
| 162 |
logging.info(f"IQ-TREE completed: {tree_file}")
|
|
|
|
| 166 |
logging.error(f"IQ-TREE failed: {e}")
|
| 167 |
return False, f"IQ-TREE failed: {str(e)}"
|
| 168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
# --- Create Multi-FASTA ---
|
| 170 |
def create_multi_fasta(query_sequence, query_id="Query_F_Gene"):
|
| 171 |
try:
|
| 172 |
+
temp_fasta = tempfile.NamedTemporaryFile(mode='w', suffix='.fasta', delete=False, dir="/data")
|
| 173 |
temp_fasta.write(f">{query_id}\n{query_sequence}\n")
|
| 174 |
+
ref_fasta_path = "/data/f_gene_sequences_aligned.fasta"
|
| 175 |
if os.path.exists(ref_fasta_path):
|
| 176 |
with open(ref_fasta_path, 'r') as ref_file:
|
| 177 |
temp_fasta.write(ref_file.read())
|
|
|
|
| 181 |
if 'sequence' in row and len(str(row['sequence'])) > 50:
|
| 182 |
temp_fasta.write(f">{row.get('id', f'Ref_{count}')}\n{str(row['sequence']).upper()}\n")
|
| 183 |
count += 1
|
| 184 |
+
if count >= 10: # Reduced for HF
|
| 185 |
break
|
| 186 |
temp_fasta.close()
|
| 187 |
return temp_fasta.name
|
|
|
|
| 205 |
guide = install_dependencies_guide()
|
| 206 |
return False, f"{status_msg}\n❌ Missing tools:\n{guide}", None, None
|
| 207 |
|
| 208 |
+
os.makedirs("/data/ml_tree_output", exist_ok=True)
|
| 209 |
multi_fasta = create_multi_fasta(sequence)
|
| 210 |
if not multi_fasta:
|
| 211 |
return False, f"{status_msg}\n❌ Failed to create input FASTA.", None, None
|
| 212 |
|
| 213 |
+
aligned_fasta = "/data/ml_tree_output/aligned_sequences.fasta"
|
| 214 |
mafft_success, mafft_result = run_mafft_alignment(multi_fasta, aligned_fasta, mafft_cmd)
|
| 215 |
os.unlink(multi_fasta)
|
| 216 |
|
| 217 |
if not mafft_success:
|
| 218 |
return False, f"{status_msg}\n❌ MAFFT failed: {mafft_result}", None, None
|
| 219 |
|
| 220 |
+
tree_prefix = "/data/ml_tree_output/ml_tree"
|
| 221 |
iqtree_success, iqtree_result = run_iqtree_analysis(aligned_fasta, tree_prefix, iqtree_cmd)
|
| 222 |
if not iqtree_success:
|
| 223 |
return False, f"{status_msg}\n❌ IQ-TREE failed: {iqtree_result}", aligned_fasta, None
|
| 224 |
|
| 225 |
tree_file = iqtree_result
|
| 226 |
+
shutil.copy2(aligned_fasta, "/data/f_gene_sequences_aligned.fasta")
|
| 227 |
+
shutil.copy2(tree_file, "/data/f_gene_sequences.phy.treefile")
|
| 228 |
|
| 229 |
success_msg = f"{status_msg}\n✅ ML tree built:\n- Alignment: {os.path.basename(aligned_fasta)}\n- Tree: {os.path.basename(tree_file)}"
|
| 230 |
return True, success_msg, aligned_fasta, tree_file
|
|
|
|
| 320 |
success, message, aligned_fasta, tree_file = build_maximum_likelihood_tree(sequence)
|
| 321 |
return format_results({"message": message, "tree_file": tree_file}, sequence, "tree")
|
| 322 |
|
| 323 |
+
# --- File Processing ---
|
| 324 |
+
def process_fasta_file(file):
|
| 325 |
+
try:
|
| 326 |
+
if not file:
|
| 327 |
+
return "Please upload a FASTA file."
|
| 328 |
+
|
| 329 |
+
sequences = {}
|
| 330 |
+
current_seq = ""
|
| 331 |
+
current_name = ""
|
| 332 |
+
with open(file.name, 'r') as f:
|
| 333 |
+
for line in f:
|
| 334 |
+
line = line.strip()
|
| 335 |
+
if line.startswith('>'):
|
| 336 |
+
if current_name and current_seq:
|
| 337 |
+
sequences[current_name] = current_seq
|
| 338 |
+
current_name = line[1:]
|
| 339 |
+
current_seq = ""
|
| 340 |
+
else:
|
| 341 |
+
current_seq += line.upper()
|
| 342 |
+
if current_name and current_seq:
|
| 343 |
+
sequences[current_name] = current_seq
|
| 344 |
+
|
| 345 |
+
if not sequences:
|
| 346 |
+
return "No valid sequences in FASTA file."
|
| 347 |
+
|
| 348 |
+
results = [f"📁 FASTA FILE ANALYSIS\nFound {len(sequences)} sequences\n{'=' * 50}"]
|
| 349 |
+
for i, (name, seq) in enumerate(sequences.items()):
|
| 350 |
+
if i >= 3: # Reduced for HF
|
| 351 |
+
results.append(f"\n... and {len(sequences) - 3} more sequences")
|
| 352 |
+
break
|
| 353 |
+
results.append(f"\n🧬 Sequence: {name}\nLength: {len(seq)} bp")
|
| 354 |
+
clean_seq = re.sub(r'[^ATCG]', '', seq)
|
| 355 |
+
if len(clean_seq) >= 10:
|
| 356 |
+
results.append(analyze_sequence(clean_seq))
|
| 357 |
+
else:
|
| 358 |
+
results.append("❌ Sequence too short or invalid")
|
| 359 |
+
results.append("-" * 40)
|
| 360 |
+
|
| 361 |
+
return "\n".join(results)
|
| 362 |
+
except Exception as e:
|
| 363 |
+
logging.error(f"FASTA processing failed: {e}")
|
| 364 |
+
return f"FASTA processing failed: {str(e)}"
|
| 365 |
+
|
| 366 |
# --- Gradio Interface ---
|
| 367 |
def create_gradio_interface():
|
| 368 |
css = """
|
|
|
|
| 396 |
with gr.Column(scale=2):
|
| 397 |
output = gr.Textbox(
|
| 398 |
label="Results",
|
| 399 |
+
lines=15,
|
| 400 |
+
max_lines=20,
|
| 401 |
elem_classes=["output-text"]
|
| 402 |
)
|
| 403 |
|
|
|
|
| 419 |
|
| 420 |
return interface
|
| 421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
# --- Main ---
|
| 423 |
if __name__ == "__main__":
|
| 424 |
+
os.makedirs("/data", exist_ok=True)
|
| 425 |
+
os.makedirs("/data/ml_tree_output", exist_ok=True)
|
| 426 |
+
os.makedirs("/data/models", exist_ok=True)
|
| 427 |
|
| 428 |
load_models()
|
| 429 |
init_tree_analyzer()
|
|
|
|
| 436 |
try:
|
| 437 |
interface = create_gradio_interface()
|
| 438 |
interface.launch(
|
|
|
|
| 439 |
server_name="0.0.0.0",
|
| 440 |
server_port=7860,
|
| 441 |
+
share=False # Managed by Hugging Face
|
|
|
|
| 442 |
)
|
| 443 |
except Exception as e:
|
| 444 |
logging.error(f"Interface launch failed: {e}")
|