Spaces:
Running
Running
Fix CAI installation for Hugging Face Spaces
Browse files- Remove Dockerfile approach (wasn't working in HF Spaces)
- Add CAI auto-installation at app startup before any imports
- Install CAI with --no-use-pep517 flag to avoid wheel naming issues
- Add packages.txt for system dependencies
- Add setup_cai.py as standalone installation script
- This should resolve the 'ModuleNotFoundError: No module named CAI' issue
- Dockerfile +0 -74
- app.py +51 -25
- packages.txt +4 -0
- setup_cai.py +118 -0
Dockerfile
DELETED
|
@@ -1,74 +0,0 @@
|
|
| 1 |
-
FROM python:3.10
|
| 2 |
-
|
| 3 |
-
WORKDIR /app
|
| 4 |
-
|
| 5 |
-
# Install system dependencies
|
| 6 |
-
RUN apt-get update && apt-get install -y \
|
| 7 |
-
git \
|
| 8 |
-
git-lfs \
|
| 9 |
-
ffmpeg \
|
| 10 |
-
libsm6 \
|
| 11 |
-
libxext6 \
|
| 12 |
-
cmake \
|
| 13 |
-
rsync \
|
| 14 |
-
libgl1 \
|
| 15 |
-
&& rm -rf /var/lib/apt/lists/* \
|
| 16 |
-
&& git lfs install
|
| 17 |
-
|
| 18 |
-
# Upgrade pip and install base packages
|
| 19 |
-
RUN pip install --no-cache-dir pip -U && \
|
| 20 |
-
pip install --no-cache-dir \
|
| 21 |
-
setuptools \
|
| 22 |
-
wheel \
|
| 23 |
-
datasets \
|
| 24 |
-
"huggingface-hub>=0.30" \
|
| 25 |
-
"hf-transfer>=0.1.4" \
|
| 26 |
-
"protobuf<4" \
|
| 27 |
-
"click<8.1" \
|
| 28 |
-
"pydantic~=1.0"
|
| 29 |
-
|
| 30 |
-
# Install CAI package first with legacy build system to avoid wheel naming issues
|
| 31 |
-
RUN pip install --no-use-pep517 --no-cache-dir \
|
| 32 |
-
git+https://github.com/Benjamin-Lee/CodonAdaptationIndex.git@b6e017a92c58829f6a5aec8c26a21262bc2a6610
|
| 33 |
-
|
| 34 |
-
# Verify CAI installation
|
| 35 |
-
RUN python -c "import CAI; from CAI import CAI as cai_func, relative_adaptiveness; print('β
CAI package verified successfully')"
|
| 36 |
-
|
| 37 |
-
# Copy requirements file and install remaining dependencies
|
| 38 |
-
COPY requirements.txt /tmp/requirements.txt
|
| 39 |
-
RUN pip install --no-cache-dir -r /tmp/requirements.txt
|
| 40 |
-
|
| 41 |
-
# Install additional streamlit dependencies
|
| 42 |
-
RUN pip install --no-cache-dir \
|
| 43 |
-
streamlit==1.28.1 \
|
| 44 |
-
"uvicorn>=0.14.0" \
|
| 45 |
-
spaces
|
| 46 |
-
|
| 47 |
-
# Create user directory structure
|
| 48 |
-
RUN mkdir -p .streamlit && \
|
| 49 |
-
git config --global core.excludesfile ~/.gitignore && \
|
| 50 |
-
echo ".streamlit" > ~/.gitignore
|
| 51 |
-
|
| 52 |
-
RUN mkdir -p /home/user && \
|
| 53 |
-
( [ -e /home/user/app ] || ln -s /app/ /home/user/app ) || true
|
| 54 |
-
|
| 55 |
-
# Copy application files
|
| 56 |
-
COPY --link ./ /app
|
| 57 |
-
|
| 58 |
-
# Verify application can import CAI after copying files
|
| 59 |
-
RUN python -c "from CodonTransformer.CodonEvaluation import *; print('β
CodonTransformer.CodonEvaluation imports successfully')"
|
| 60 |
-
|
| 61 |
-
# Set up environment
|
| 62 |
-
ENV PYTHONPATH=/app
|
| 63 |
-
ENV STREAMLIT_SERVER_HEADLESS=true
|
| 64 |
-
ENV STREAMLIT_SERVER_PORT=7860
|
| 65 |
-
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
| 66 |
-
|
| 67 |
-
# Expose port
|
| 68 |
-
EXPOSE 7860
|
| 69 |
-
|
| 70 |
-
# Health check
|
| 71 |
-
HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
|
| 72 |
-
|
| 73 |
-
# Run the application
|
| 74 |
-
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,3 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import torch
|
| 3 |
import pandas as pd
|
|
@@ -14,8 +42,6 @@ import warnings
|
|
| 14 |
warnings.filterwarnings("ignore")
|
| 15 |
|
| 16 |
# Import CodonTransformer modules
|
| 17 |
-
import sys
|
| 18 |
-
import os
|
| 19 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 20 |
|
| 21 |
from CodonTransformer.CodonPrediction import (
|
|
@@ -220,7 +246,7 @@ def validate_sequence(sequence: str) -> Tuple[bool, str, str, str]:
|
|
| 220 |
if sequence_chars.issubset(dna_chars):
|
| 221 |
if len(sequence) < 3:
|
| 222 |
return False, "DNA sequence must be at least 3 nucleotides long", "dna", sequence
|
| 223 |
-
|
| 224 |
# Auto-fix DNA sequences not divisible by 3
|
| 225 |
if len(sequence) % 3 != 0:
|
| 226 |
remainder = len(sequence) % 3
|
|
@@ -229,7 +255,7 @@ def validate_sequence(sequence: str) -> Tuple[bool, str, str, str]:
|
|
| 229 |
else:
|
| 230 |
fixed_sequence = sequence
|
| 231 |
message = "Valid DNA sequence"
|
| 232 |
-
|
| 233 |
return True, message, "dna", fixed_sequence
|
| 234 |
|
| 235 |
# If contains protein-specific amino acids, treat as protein
|
|
@@ -654,7 +680,7 @@ def main():
|
|
| 654 |
|
| 655 |
def single_sequence_optimization():
|
| 656 |
"""Single sequence optimization interface - enhanced from original functionality"""
|
| 657 |
-
# Sidebar configuration
|
| 658 |
st.sidebar.header("π§ Configuration")
|
| 659 |
organism_options = [
|
| 660 |
"Escherichia coli general",
|
|
@@ -687,17 +713,17 @@ def single_sequence_optimization():
|
|
| 687 |
)
|
| 688 |
if not POST_PROCESSING_AVAILABLE:
|
| 689 |
st.sidebar.warning("β οΈ DNAChisel not available. Install with: pip install dnachisel")
|
| 690 |
-
|
| 691 |
# Dataset Information
|
| 692 |
st.sidebar.markdown("---")
|
| 693 |
st.sidebar.markdown("### π Dataset Information")
|
| 694 |
st.sidebar.markdown("""
|
| 695 |
- **Dataset**: [ColiFormer-Data](https://huggingface.co/datasets/saketh11/ColiFormer-Data)
|
| 696 |
- **Training**: 4,300 high-CAI E. coli sequences
|
| 697 |
-
- **Reference**: 50,000+ E. coli gene sequences
|
| 698 |
- **Auto-download**: CAI weights & tAI coefficients
|
| 699 |
""")
|
| 700 |
-
|
| 701 |
# Model Information
|
| 702 |
st.sidebar.markdown("### π€ Model Information")
|
| 703 |
st.sidebar.markdown("""
|
|
@@ -853,10 +879,10 @@ def single_sequence_optimization():
|
|
| 853 |
st.error(f"β **Optimization Failed:** {st.session_state.results}")
|
| 854 |
else:
|
| 855 |
display_optimization_results(
|
| 856 |
-
st.session_state.results,
|
| 857 |
-
st.session_state.get('organism', organism),
|
| 858 |
-
st.session_state.get('sequence_clean', ''),
|
| 859 |
-
st.session_state.get('sequence_type', 'protein'),
|
| 860 |
st.session_state.get('input_metrics', {})
|
| 861 |
)
|
| 862 |
|
|
@@ -1226,20 +1252,20 @@ def display_batch_results():
|
|
| 1226 |
|
| 1227 |
# CAI Extremes Analysis
|
| 1228 |
st.subheader("π― CAI Performance Analysis")
|
| 1229 |
-
|
| 1230 |
# Filter out rows with NaN CAI values for analysis
|
| 1231 |
valid_cai_df = results_df.dropna(subset=['cai_after'])
|
| 1232 |
-
|
| 1233 |
if len(valid_cai_df) > 0:
|
| 1234 |
# Find lowest and highest CAI sequences
|
| 1235 |
lowest_cai_idx = valid_cai_df['cai_after'].idxmin()
|
| 1236 |
highest_cai_idx = valid_cai_df['cai_after'].idxmax()
|
| 1237 |
-
|
| 1238 |
lowest_cai_row = results_df.loc[lowest_cai_idx]
|
| 1239 |
highest_cai_row = results_df.loc[highest_cai_idx]
|
| 1240 |
-
|
| 1241 |
col1, col2 = st.columns(2)
|
| 1242 |
-
|
| 1243 |
with col1:
|
| 1244 |
st.markdown("**π» Lowest CAI Sequence**")
|
| 1245 |
st.write(f"**Name:** {lowest_cai_row['name']}")
|
|
@@ -1247,12 +1273,12 @@ def display_batch_results():
|
|
| 1247 |
st.metric("GC Content", f"{lowest_cai_row['gc_content_after']:.1f}%")
|
| 1248 |
st.metric("tAI Score", f"{lowest_cai_row['tai_after']:.3f}")
|
| 1249 |
st.metric("Length", f"{lowest_cai_row['length_after']} bp")
|
| 1250 |
-
|
| 1251 |
# Show improvement
|
| 1252 |
if pd.notna(lowest_cai_row['cai_before']):
|
| 1253 |
cai_improvement = lowest_cai_row['cai_after'] - lowest_cai_row['cai_before']
|
| 1254 |
st.metric("CAI Improvement", f"{cai_improvement:+.3f}")
|
| 1255 |
-
|
| 1256 |
with col2:
|
| 1257 |
st.markdown("**πΊ Highest CAI Sequence**")
|
| 1258 |
st.write(f"**Name:** {highest_cai_row['name']}")
|
|
@@ -1260,12 +1286,12 @@ def display_batch_results():
|
|
| 1260 |
st.metric("GC Content", f"{highest_cai_row['gc_content_after']:.1f}%")
|
| 1261 |
st.metric("tAI Score", f"{highest_cai_row['tai_after']:.3f}")
|
| 1262 |
st.metric("Length", f"{highest_cai_row['length_after']} bp")
|
| 1263 |
-
|
| 1264 |
# Show improvement
|
| 1265 |
if pd.notna(highest_cai_row['cai_before']):
|
| 1266 |
cai_improvement = highest_cai_row['cai_after'] - highest_cai_row['cai_before']
|
| 1267 |
st.metric("CAI Improvement", f"{cai_improvement:+.3f}")
|
| 1268 |
-
|
| 1269 |
# CAI Distribution Chart
|
| 1270 |
st.subheader("π CAI Distribution")
|
| 1271 |
fig = go.Figure()
|
|
@@ -1276,7 +1302,7 @@ def display_batch_results():
|
|
| 1276 |
marker_color='darkblue',
|
| 1277 |
opacity=0.7
|
| 1278 |
))
|
| 1279 |
-
|
| 1280 |
# Add vertical lines for lowest and highest
|
| 1281 |
fig.add_vline(
|
| 1282 |
x=lowest_cai_row['cai_after'],
|
|
@@ -1286,11 +1312,11 @@ def display_batch_results():
|
|
| 1286 |
)
|
| 1287 |
fig.add_vline(
|
| 1288 |
x=highest_cai_row['cai_after'],
|
| 1289 |
-
line_dash="dash",
|
| 1290 |
line_color="green",
|
| 1291 |
annotation_text=f"Highest: {highest_cai_row['cai_after']:.3f}"
|
| 1292 |
)
|
| 1293 |
-
|
| 1294 |
fig.update_layout(
|
| 1295 |
title="Distribution of Optimized CAI Scores",
|
| 1296 |
xaxis_title="CAI Score",
|
|
@@ -1339,7 +1365,7 @@ def display_batch_results():
|
|
| 1339 |
st.plotly_chart(fig_gc, use_container_width=True)
|
| 1340 |
else:
|
| 1341 |
st.warning("β οΈ No valid GC content values found in the batch results.")
|
| 1342 |
-
|
| 1343 |
else:
|
| 1344 |
st.warning("β οΈ No valid CAI scores found in the batch results. Check if CAI weights are properly loaded.")
|
| 1345 |
|
|
|
|
| 1 |
+
# Setup CAI package before any other imports
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# Check and setup CAI package if needed
|
| 6 |
+
def setup_cai_if_needed():
|
| 7 |
+
try:
|
| 8 |
+
import CAI
|
| 9 |
+
return True
|
| 10 |
+
except ImportError:
|
| 11 |
+
print("CAI not found, attempting to install...")
|
| 12 |
+
try:
|
| 13 |
+
import subprocess
|
| 14 |
+
# Install CAI with legacy build system
|
| 15 |
+
subprocess.check_call([
|
| 16 |
+
sys.executable, "-m", "pip", "install", "--no-use-pep517", "--no-cache-dir",
|
| 17 |
+
"git+https://github.com/Benjamin-Lee/CodonAdaptationIndex.git@b6e017a92c58829f6a5aec8c26a21262bc2a6610"
|
| 18 |
+
])
|
| 19 |
+
import CAI
|
| 20 |
+
print("β
CAI installed successfully")
|
| 21 |
+
return True
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f"β Failed to install CAI: {e}")
|
| 24 |
+
return False
|
| 25 |
+
|
| 26 |
+
# Setup CAI before any other imports that might need it
|
| 27 |
+
setup_cai_if_needed()
|
| 28 |
+
|
| 29 |
import streamlit as st
|
| 30 |
import torch
|
| 31 |
import pandas as pd
|
|
|
|
| 42 |
warnings.filterwarnings("ignore")
|
| 43 |
|
| 44 |
# Import CodonTransformer modules
|
|
|
|
|
|
|
| 45 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 46 |
|
| 47 |
from CodonTransformer.CodonPrediction import (
|
|
|
|
| 246 |
if sequence_chars.issubset(dna_chars):
|
| 247 |
if len(sequence) < 3:
|
| 248 |
return False, "DNA sequence must be at least 3 nucleotides long", "dna", sequence
|
| 249 |
+
|
| 250 |
# Auto-fix DNA sequences not divisible by 3
|
| 251 |
if len(sequence) % 3 != 0:
|
| 252 |
remainder = len(sequence) % 3
|
|
|
|
| 255 |
else:
|
| 256 |
fixed_sequence = sequence
|
| 257 |
message = "Valid DNA sequence"
|
| 258 |
+
|
| 259 |
return True, message, "dna", fixed_sequence
|
| 260 |
|
| 261 |
# If contains protein-specific amino acids, treat as protein
|
|
|
|
| 680 |
|
| 681 |
def single_sequence_optimization():
|
| 682 |
"""Single sequence optimization interface - enhanced from original functionality"""
|
| 683 |
+
# Sidebar configuration
|
| 684 |
st.sidebar.header("π§ Configuration")
|
| 685 |
organism_options = [
|
| 686 |
"Escherichia coli general",
|
|
|
|
| 713 |
)
|
| 714 |
if not POST_PROCESSING_AVAILABLE:
|
| 715 |
st.sidebar.warning("β οΈ DNAChisel not available. Install with: pip install dnachisel")
|
| 716 |
+
|
| 717 |
# Dataset Information
|
| 718 |
st.sidebar.markdown("---")
|
| 719 |
st.sidebar.markdown("### π Dataset Information")
|
| 720 |
st.sidebar.markdown("""
|
| 721 |
- **Dataset**: [ColiFormer-Data](https://huggingface.co/datasets/saketh11/ColiFormer-Data)
|
| 722 |
- **Training**: 4,300 high-CAI E. coli sequences
|
| 723 |
+
- **Reference**: 50,000+ E. coli gene sequences
|
| 724 |
- **Auto-download**: CAI weights & tAI coefficients
|
| 725 |
""")
|
| 726 |
+
|
| 727 |
# Model Information
|
| 728 |
st.sidebar.markdown("### π€ Model Information")
|
| 729 |
st.sidebar.markdown("""
|
|
|
|
| 879 |
st.error(f"β **Optimization Failed:** {st.session_state.results}")
|
| 880 |
else:
|
| 881 |
display_optimization_results(
|
| 882 |
+
st.session_state.results,
|
| 883 |
+
st.session_state.get('organism', organism),
|
| 884 |
+
st.session_state.get('sequence_clean', ''),
|
| 885 |
+
st.session_state.get('sequence_type', 'protein'),
|
| 886 |
st.session_state.get('input_metrics', {})
|
| 887 |
)
|
| 888 |
|
|
|
|
| 1252 |
|
| 1253 |
# CAI Extremes Analysis
|
| 1254 |
st.subheader("π― CAI Performance Analysis")
|
| 1255 |
+
|
| 1256 |
# Filter out rows with NaN CAI values for analysis
|
| 1257 |
valid_cai_df = results_df.dropna(subset=['cai_after'])
|
| 1258 |
+
|
| 1259 |
if len(valid_cai_df) > 0:
|
| 1260 |
# Find lowest and highest CAI sequences
|
| 1261 |
lowest_cai_idx = valid_cai_df['cai_after'].idxmin()
|
| 1262 |
highest_cai_idx = valid_cai_df['cai_after'].idxmax()
|
| 1263 |
+
|
| 1264 |
lowest_cai_row = results_df.loc[lowest_cai_idx]
|
| 1265 |
highest_cai_row = results_df.loc[highest_cai_idx]
|
| 1266 |
+
|
| 1267 |
col1, col2 = st.columns(2)
|
| 1268 |
+
|
| 1269 |
with col1:
|
| 1270 |
st.markdown("**π» Lowest CAI Sequence**")
|
| 1271 |
st.write(f"**Name:** {lowest_cai_row['name']}")
|
|
|
|
| 1273 |
st.metric("GC Content", f"{lowest_cai_row['gc_content_after']:.1f}%")
|
| 1274 |
st.metric("tAI Score", f"{lowest_cai_row['tai_after']:.3f}")
|
| 1275 |
st.metric("Length", f"{lowest_cai_row['length_after']} bp")
|
| 1276 |
+
|
| 1277 |
# Show improvement
|
| 1278 |
if pd.notna(lowest_cai_row['cai_before']):
|
| 1279 |
cai_improvement = lowest_cai_row['cai_after'] - lowest_cai_row['cai_before']
|
| 1280 |
st.metric("CAI Improvement", f"{cai_improvement:+.3f}")
|
| 1281 |
+
|
| 1282 |
with col2:
|
| 1283 |
st.markdown("**πΊ Highest CAI Sequence**")
|
| 1284 |
st.write(f"**Name:** {highest_cai_row['name']}")
|
|
|
|
| 1286 |
st.metric("GC Content", f"{highest_cai_row['gc_content_after']:.1f}%")
|
| 1287 |
st.metric("tAI Score", f"{highest_cai_row['tai_after']:.3f}")
|
| 1288 |
st.metric("Length", f"{highest_cai_row['length_after']} bp")
|
| 1289 |
+
|
| 1290 |
# Show improvement
|
| 1291 |
if pd.notna(highest_cai_row['cai_before']):
|
| 1292 |
cai_improvement = highest_cai_row['cai_after'] - highest_cai_row['cai_before']
|
| 1293 |
st.metric("CAI Improvement", f"{cai_improvement:+.3f}")
|
| 1294 |
+
|
| 1295 |
# CAI Distribution Chart
|
| 1296 |
st.subheader("π CAI Distribution")
|
| 1297 |
fig = go.Figure()
|
|
|
|
| 1302 |
marker_color='darkblue',
|
| 1303 |
opacity=0.7
|
| 1304 |
))
|
| 1305 |
+
|
| 1306 |
# Add vertical lines for lowest and highest
|
| 1307 |
fig.add_vline(
|
| 1308 |
x=lowest_cai_row['cai_after'],
|
|
|
|
| 1312 |
)
|
| 1313 |
fig.add_vline(
|
| 1314 |
x=highest_cai_row['cai_after'],
|
| 1315 |
+
line_dash="dash",
|
| 1316 |
line_color="green",
|
| 1317 |
annotation_text=f"Highest: {highest_cai_row['cai_after']:.3f}"
|
| 1318 |
)
|
| 1319 |
+
|
| 1320 |
fig.update_layout(
|
| 1321 |
title="Distribution of Optimized CAI Scores",
|
| 1322 |
xaxis_title="CAI Score",
|
|
|
|
| 1365 |
st.plotly_chart(fig_gc, use_container_width=True)
|
| 1366 |
else:
|
| 1367 |
st.warning("β οΈ No valid GC content values found in the batch results.")
|
| 1368 |
+
|
| 1369 |
else:
|
| 1370 |
st.warning("β οΈ No valid CAI scores found in the batch results. Check if CAI weights are properly loaded.")
|
| 1371 |
|
packages.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
git
|
| 2 |
+
git-lfs
|
| 3 |
+
build-essential
|
| 4 |
+
python3-dev
|
setup_cai.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Setup script for CAI package installation in ColiFormer.
|
| 4 |
+
This script handles the installation of the CAI package with proper build flags
|
| 5 |
+
to avoid wheel naming issues that occur with standard pip install.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import subprocess
|
| 9 |
+
import sys
|
| 10 |
+
import os
|
| 11 |
+
import importlib.util
|
| 12 |
+
|
| 13 |
+
def check_cai_installed():
|
| 14 |
+
"""Check if CAI package is already installed and working."""
|
| 15 |
+
try:
|
| 16 |
+
spec = importlib.util.find_spec("CAI")
|
| 17 |
+
if spec is None:
|
| 18 |
+
return False
|
| 19 |
+
|
| 20 |
+
# Try to import the specific functions we need
|
| 21 |
+
import CAI
|
| 22 |
+
from CAI import CAI as cai_func, relative_adaptiveness
|
| 23 |
+
print("β
CAI package is already installed and working")
|
| 24 |
+
return True
|
| 25 |
+
except ImportError as e:
|
| 26 |
+
print(f"β CAI package not found or not working: {e}")
|
| 27 |
+
return False
|
| 28 |
+
|
| 29 |
+
def install_cai():
|
| 30 |
+
"""Install CAI package with proper build configuration."""
|
| 31 |
+
print("π§ Installing CAI package...")
|
| 32 |
+
|
| 33 |
+
cai_repo = "git+https://github.com/Benjamin-Lee/CodonAdaptationIndex.git@b6e017a92c58829f6a5aec8c26a21262bc2a6610"
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
# First ensure we have build tools
|
| 37 |
+
print("Installing build dependencies...")
|
| 38 |
+
subprocess.run([
|
| 39 |
+
sys.executable, "-m", "pip", "install", "--upgrade",
|
| 40 |
+
"setuptools>=65.0", "wheel>=0.37.0", "pip>=21.0"
|
| 41 |
+
], check=True, capture_output=True)
|
| 42 |
+
|
| 43 |
+
# Try installing with --no-use-pep517 flag first (preferred method)
|
| 44 |
+
print("Attempting CAI installation with legacy build system...")
|
| 45 |
+
try:
|
| 46 |
+
result = subprocess.run([
|
| 47 |
+
sys.executable, "-m", "pip", "install",
|
| 48 |
+
"--no-use-pep517", "--no-cache-dir", cai_repo
|
| 49 |
+
], check=True, capture_output=True, text=True)
|
| 50 |
+
print("β
CAI installed successfully with legacy build")
|
| 51 |
+
return True
|
| 52 |
+
except subprocess.CalledProcessError:
|
| 53 |
+
print("β οΈ Legacy build failed, trying standard installation...")
|
| 54 |
+
|
| 55 |
+
# Fallback to standard installation
|
| 56 |
+
result = subprocess.run([
|
| 57 |
+
sys.executable, "-m", "pip", "install", "--no-cache-dir", cai_repo
|
| 58 |
+
], check=True, capture_output=True, text=True)
|
| 59 |
+
|
| 60 |
+
print("β
CAI installed successfully with standard build")
|
| 61 |
+
return True
|
| 62 |
+
|
| 63 |
+
except subprocess.CalledProcessError as e:
|
| 64 |
+
print(f"β CAI installation failed: {e}")
|
| 65 |
+
if hasattr(e, 'stderr') and e.stderr:
|
| 66 |
+
print(f"Error output: {e.stderr}")
|
| 67 |
+
return False
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"β Unexpected error during CAI installation: {e}")
|
| 70 |
+
return False
|
| 71 |
+
|
| 72 |
+
def verify_cai_installation():
|
| 73 |
+
"""Verify that CAI package is working correctly."""
|
| 74 |
+
try:
|
| 75 |
+
import CAI
|
| 76 |
+
from CAI import CAI as cai_func, relative_adaptiveness
|
| 77 |
+
|
| 78 |
+
# Test basic functionality
|
| 79 |
+
test_sequences = ["ATGAAATAA", "ATGGGCTAA"]
|
| 80 |
+
weights = relative_adaptiveness(sequences=test_sequences)
|
| 81 |
+
cai_score = cai_func("ATGAAATAA", weights=weights)
|
| 82 |
+
|
| 83 |
+
print(f"β
CAI verification successful (test score: {cai_score:.3f})")
|
| 84 |
+
return True
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"β CAI verification failed: {e}")
|
| 88 |
+
return False
|
| 89 |
+
|
| 90 |
+
def main():
|
| 91 |
+
"""Main setup function."""
|
| 92 |
+
print("ColiFormer CAI Setup")
|
| 93 |
+
print("=" * 50)
|
| 94 |
+
|
| 95 |
+
# Check if already installed
|
| 96 |
+
if check_cai_installed():
|
| 97 |
+
if verify_cai_installation():
|
| 98 |
+
print("π CAI is ready to use!")
|
| 99 |
+
return True
|
| 100 |
+
else:
|
| 101 |
+
print("β οΈ CAI is installed but not working properly, reinstalling...")
|
| 102 |
+
|
| 103 |
+
# Install CAI
|
| 104 |
+
if install_cai():
|
| 105 |
+
# Verify installation
|
| 106 |
+
if verify_cai_installation():
|
| 107 |
+
print("π CAI setup completed successfully!")
|
| 108 |
+
return True
|
| 109 |
+
else:
|
| 110 |
+
print("π₯ CAI installation verification failed!")
|
| 111 |
+
return False
|
| 112 |
+
else:
|
| 113 |
+
print("π₯ CAI installation failed!")
|
| 114 |
+
return False
|
| 115 |
+
|
| 116 |
+
if __name__ == "__main__":
|
| 117 |
+
success = main()
|
| 118 |
+
sys.exit(0 if success else 1)
|