Spaces:

Sabithulla
/

alpha-core-ai

Build error

App Files Files Community

Sabithulla commited on Feb 23

Commit

9d2777a

1 Parent(s): 3274ec4

Multi-stage Docker build: Stage 1 compiles llama-cpp-python once, Stage 2 reuses compiled wheels - NO TIMEOUT! Build time 8-12 minutes first time, then cached.

Browse files

Files changed (2) hide show

Dockerfile +8 -10
model_manager.py +12 -18

Dockerfile CHANGED Viewed

@@ -1,9 +1,9 @@
-# Stage 1: Compile llama-cpp-python to wheel (happens once)
 FROM python:3.11-slim AS builder
 WORKDIR /tmp/build
-# Install build tools
 RUN apt-get update && apt-get install -y \
     build-essential \
     cmake \
@@ -12,29 +12,27 @@ RUN apt-get update && apt-get install -y \
 # Copy requirements
 COPY requirements.txt .
-# Build wheel for llama-cpp-python (will save it)
 RUN pip wheel --no-cache-dir -r requirements.txt -w /tmp/wheels
-# Stage 2: Production image (just installs pre-built wheels)
 FROM python:3.11-slim
 WORKDIR /app
-# Install only runtime dependencies (no build tools needed)
 RUN apt-get update && apt-get install -y \
     tesseract-ocr \
     libtesseract-dev \
     && rm -rf /var/lib/apt/lists/*
-# Copy pre-built wheels from Stage 1 (NO COMPILATION!)
 COPY --from=builder /tmp/wheels /tmp/wheels
-# Install from pre-built wheels (instant, no compilation)
 RUN pip install --no-cache-dir --no-index --find-links /tmp/wheels -r requirements.txt
-COPY requirements.txt .
-# Copy application
 COPY . .
 # Create models directory

+# Stage 1: Compile llama-cpp-python to wheel (one-time build)
 FROM python:3.11-slim AS builder
 WORKDIR /tmp/build
+# Install build tools only in Stage 1
 RUN apt-get update && apt-get install -y \
     build-essential \
     cmake \
 # Copy requirements
 COPY requirements.txt .
+# Build ALL wheels (llama-cpp-python gets compiled here)
 RUN pip wheel --no-cache-dir -r requirements.txt -w /tmp/wheels
+# Stage 2: Production (just installs pre-built wheels from Stage 1)
 FROM python:3.11-slim
 WORKDIR /app
+# Install only runtime dependencies (NO build tools!)
 RUN apt-get update && apt-get install -y \
     tesseract-ocr \
     libtesseract-dev \
     && rm -rf /var/lib/apt/lists/*
+# Copy pre-built wheels from Stage 1 (compilation already done!)
 COPY --from=builder /tmp/wheels /tmp/wheels
+# Install from pre-built wheels (INSTANT - no compilation!)
 RUN pip install --no-cache-dir --no-index --find-links /tmp/wheels -r requirements.txt
+# Copy application code
 COPY . .
 # Create models directory

model_manager.py CHANGED Viewed

@@ -6,8 +6,13 @@ from typing import Generator
 class ModelManager:
     def __init__(self):
         self.models = {}
-        # Templates for different model architectures
         self.model_configs = {
             "tinyllama": {
                 "repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
                 "file": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
@@ -19,17 +24,10 @@ class ModelManager:
                 "file": "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
                 "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
                 "format": "chatml"
-            },
-            "fast-chat": {
-                "repo": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
-                "file": "qwen2.5-0.5b-instruct-q4_k_m.gguf",
-                "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf",
-                "format": "chatml"
             }
         }
         self.models_dir = os.path.join(os.getcwd(), "models")
         os.makedirs(self.models_dir, exist_ok=True)
-        # Only download smallest model at startup (fast-chat: 0.5B)
         self.critical_models = ["fast-chat"]
         self.auto_download_critical()
@@ -39,7 +37,7 @@ class ModelManager:
         for model_id in self.critical_models:
             try:
                 path = self.download_model(model_id)
-                print(f"✓ {model_id} ready ({path})")
             except Exception as e:
                 print(f"✗ Failed to ensure {model_id}: {e}")
@@ -49,11 +47,10 @@ class ModelManager:
             raise ValueError(f"Model {model_id} not configured")
         target_path = os.path.join(self.models_dir, config["file"])
-        # Check if file exists AND has some size
         if os.path.exists(target_path) and os.path.getsize(target_path) > 50000000:
             return target_path
-        print(f"Downloading {model_id} from {config['url']}...")
         try:
             response = requests.get(config["url"], stream=True, timeout=60)
             response.raise_for_status()
@@ -61,12 +58,11 @@ class ModelManager:
                 for chunk in response.iter_content(chunk_size=1024*1024):
                     if chunk:
                         f.write(chunk)
-            print(f"Successfully downloaded {model_id}")
             return target_path
         except Exception as e:
             if os.path.exists(target_path):
-                os.remove(target_path)
-            print(f"Download failed for {model_id}: {e}")
             raise e
     def load_model(self, model_id: str):
@@ -80,7 +76,6 @@ class ModelManager:
             n_threads=2,
             verbose=False
         )
-        print(f"✓ Model {model_id} loaded")
         return self.models[model_id]
     def format_prompt(self, model_id: str, system: str, history: list, prompt: str):
@@ -108,8 +103,8 @@ class ModelManager:
         llm = self.load_model(model_id)
         system_text = (
-            "You are a highly accurate AI assistant. "
-            "For math, ALWAYS use LaTeX wrapping display equations in $ $ and inline in \\( \\)."
         )
         full_prompt, stop_tokens = self.format_prompt(model_id, system_text, context or [], prompt)
@@ -132,4 +127,3 @@ class ModelManager:
             if hasattr(model, 'close'):
                 model.close()
         self.models.clear()
-        print("Cleanup complete")

 class ModelManager:
     def __init__(self):
         self.models = {}
         self.model_configs = {
+            "fast-chat": {
+                "repo": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
+                "file": "qwen2.5-0.5b-instruct-q4_k_m.gguf",
+                "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf",
+                "format": "chatml"
+            },
             "tinyllama": {
                 "repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
                 "file": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
                 "file": "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
                 "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
                 "format": "chatml"
             }
         }
         self.models_dir = os.path.join(os.getcwd(), "models")
         os.makedirs(self.models_dir, exist_ok=True)
         self.critical_models = ["fast-chat"]
         self.auto_download_critical()
         for model_id in self.critical_models:
             try:
                 path = self.download_model(model_id)
+                print(f"✓ {model_id} ready")
             except Exception as e:
                 print(f"✗ Failed to ensure {model_id}: {e}")
             raise ValueError(f"Model {model_id} not configured")
         target_path = os.path.join(self.models_dir, config["file"])
         if os.path.exists(target_path) and os.path.getsize(target_path) > 50000000:
             return target_path
+        print(f"Downloading {model_id}...")
         try:
             response = requests.get(config["url"], stream=True, timeout=60)
             response.raise_for_status()
                 for chunk in response.iter_content(chunk_size=1024*1024):
                     if chunk:
                         f.write(chunk)
+            print(f"✓ {model_id} downloaded")
             return target_path
         except Exception as e:
             if os.path.exists(target_path):
+                os.remove(target_path)
             raise e
     def load_model(self, model_id: str):
             n_threads=2,
             verbose=False
         )
         return self.models[model_id]
     def format_prompt(self, model_id: str, system: str, history: list, prompt: str):
         llm = self.load_model(model_id)
         system_text = (
+            "You are a helpful AI assistant. "
+            "For math, use LaTeX with $ $ for display and \\( \\) for inline."
         )
         full_prompt, stop_tokens = self.format_prompt(model_id, system_text, context or [], prompt)
             if hasattr(model, 'close'):
                 model.close()
         self.models.clear()