Andrew McCracken commited on
Commit
0a1ff0d
·
1 Parent(s): 8e854ef

Fix: Install llama-cpp-python at startup to /tmp to avoid build timeout

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -15
  2. install_llama.py +59 -0
  3. start.sh +14 -0
Dockerfile CHANGED
@@ -2,28 +2,23 @@ FROM python:3.12-slim
2
 
3
  WORKDIR /app
4
 
5
- # Install system dependencies for building llama-cpp-python
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
  cmake \
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
- # Copy requirements and install
13
  COPY requirements.txt .
14
-
15
- # Set environment variables for CPU-only build (no BLAS, no CUDA)
16
- ENV CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_METAL=OFF"
17
-
18
- # Install llama-cpp-python (will build from source with CPU-only)
19
- RUN pip install --no-cache-dir llama-cpp-python==0.3.14 --verbose
20
-
21
- # Install remaining requirements
22
  RUN pip install --no-cache-dir -r requirements.txt
23
 
24
- # Copy application code
25
  COPY . .
26
 
 
 
 
27
  # Create data directory for persistence
28
  RUN mkdir -p /data
29
 
@@ -33,13 +28,14 @@ ENV MODEL_REPO=daskalos-apps/phi4-cybersec-Q4_K_M
33
  ENV MODEL_FILENAME=phi4-mini-instruct-Q4_K_M.gguf
34
  ENV USE_RAG=true
35
  ENV CACHE_ENABLED=true
 
36
 
37
  # Expose port
38
  EXPOSE 8000
39
 
40
- # Health check
41
- HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
42
  CMD python -c "import requests; requests.get('http://localhost:8000/health')"
43
 
44
- # Run the application
45
- CMD ["python", "main.py"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # Install system dependencies for building llama-cpp-python at runtime
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
  cmake \
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
+ # Copy requirements and install (excluding llama-cpp-python)
13
  COPY requirements.txt .
 
 
 
 
 
 
 
 
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
+ # Copy application code and startup scripts
17
  COPY . .
18
 
19
+ # Make startup script executable
20
+ RUN chmod +x start.sh
21
+
22
  # Create data directory for persistence
23
  RUN mkdir -p /data
24
 
 
28
  ENV MODEL_FILENAME=phi4-mini-instruct-Q4_K_M.gguf
29
  ENV USE_RAG=true
30
  ENV CACHE_ENABLED=true
31
+ ENV CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_METAL=OFF"
32
 
33
  # Expose port
34
  EXPOSE 8000
35
 
36
+ # Health check (with longer startup period for llama-cpp build)
37
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=600s --retries=3 \
38
  CMD python -c "import requests; requests.get('http://localhost:8000/health')"
39
 
40
+ # Run the startup script which installs llama-cpp-python then starts the app
41
+ CMD ["bash", "start.sh"]
install_llama.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Install llama-cpp-python at startup to avoid Docker build timeout.
4
+ This script runs before the main application starts.
5
+ """
6
+ import subprocess
7
+ import sys
8
+ import os
9
+ import time
10
+
11
+ def install_llama_cpp():
12
+ """Install llama-cpp-python to /tmp directory"""
13
+ print("=" * 60)
14
+ print("🔧 Installing llama-cpp-python at startup...")
15
+ print("=" * 60)
16
+
17
+ # Set installation directory to /tmp (always writable)
18
+ install_dir = "/tmp/llama_cpp_install"
19
+ os.makedirs(install_dir, exist_ok=True)
20
+
21
+ # Set environment variables
22
+ env = os.environ.copy()
23
+ env['PYTHONUSERBASE'] = install_dir
24
+ env['CMAKE_ARGS'] = '-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_METAL=OFF'
25
+
26
+ start_time = time.time()
27
+
28
+ try:
29
+ # Install to custom location
30
+ result = subprocess.run(
31
+ [sys.executable, "-m", "pip", "install",
32
+ "--target", install_dir,
33
+ "--no-cache-dir",
34
+ "llama-cpp-python==0.3.14"],
35
+ env=env,
36
+ capture_output=False,
37
+ text=True,
38
+ check=True
39
+ )
40
+
41
+ elapsed = time.time() - start_time
42
+ print(f"\n✅ llama-cpp-python installed successfully in {elapsed:.1f}s")
43
+
44
+ # Add to Python path
45
+ if install_dir not in sys.path:
46
+ sys.path.insert(0, install_dir)
47
+
48
+ print(f"📦 Installation directory: {install_dir}")
49
+ return True
50
+
51
+ except subprocess.CalledProcessError as e:
52
+ elapsed = time.time() - start_time
53
+ print(f"\n❌ Installation failed after {elapsed:.1f}s")
54
+ print(f"Error: {e}")
55
+ return False
56
+
57
+ if __name__ == "__main__":
58
+ success = install_llama_cpp()
59
+ sys.exit(0 if success else 1)
start.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "🚀 Starting Cybersecurity Chatbot..."
5
+
6
+ # Install llama-cpp-python at startup
7
+ python3 install_llama.py
8
+
9
+ # Add installation directory to PYTHONPATH
10
+ export PYTHONPATH="/tmp/llama_cpp_install:$PYTHONPATH"
11
+
12
+ # Start the application
13
+ echo "▶️ Launching FastAPI application..."
14
+ python3 main.py