Spaces:

ResearchEngineering
/

AGI

Sleeping

App Files Files Community

Dmitry Beresnev commited on Jan 31

Commit

f64a284

1 Parent(s): 7763bf4

fix dockerfile

Browse files

Files changed (2) hide show

Dockerfile +6 -4
app.py +46 -12

Dockerfile CHANGED Viewed

@@ -9,9 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libcurl4-openssl-dev \
     && rm -rf /var/lib/apt/lists/*
-# Clone and build llama.cpp with MINIMAL optimizations (fast build)
 WORKDIR /build
-ARG CACHEBUST=4
 RUN git clone https://github.com/ggerganov/llama.cpp.git && \
     cd llama.cpp && \
     cmake -B build -DCMAKE_BUILD_TYPE=Release \
@@ -19,7 +19,8 @@ RUN git clone https://github.com/ggerganov/llama.cpp.git && \
         -DGGML_AVX2=OFF \
         -DGGML_AVX=OFF \
         -DGGML_FMA=OFF \
-        -DGGML_F16C=OFF && \
     cmake --build build --config Release --target llama-server -j1 && \
     echo "=== Binary dependencies ===" && \
     ldd build/bin/llama-server || true
@@ -27,12 +28,13 @@ RUN git clone https://github.com/ggerganov/llama.cpp.git && \
 # Runtime stage
 FROM debian:bookworm-slim
-# Install runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libcurl4 \
     ca-certificates \
     libgomp1 \
     libstdc++6 \
     && rm -rf /var/lib/apt/lists/*
 # Copy llama-server binary and all shared libraries from builder

     libcurl4-openssl-dev \
     && rm -rf /var/lib/apt/lists/*
+# Clone and build llama.cpp with SSL support for HuggingFace Hub
 WORKDIR /build
+ARG CACHEBUST=5
 RUN git clone https://github.com/ggerganov/llama.cpp.git && \
     cd llama.cpp && \
     cmake -B build -DCMAKE_BUILD_TYPE=Release \
         -DGGML_AVX2=OFF \
         -DGGML_AVX=OFF \
         -DGGML_FMA=OFF \
+        -DGGML_F16C=OFF \
+        -DLLAMA_CURL=ON && \
     cmake --build build --config Release --target llama-server -j1 && \
     echo "=== Binary dependencies ===" && \
     ldd build/bin/llama-server || true
 # Runtime stage
 FROM debian:bookworm-slim
+# Install runtime dependencies including SSL/HTTPS support
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libcurl4 \
     ca-certificates \
     libgomp1 \
     libstdc++6 \
+    openssl \
     && rm -rf /var/lib/apt/lists/*
 # Copy llama-server binary and all shared libraries from builder

app.py CHANGED Viewed

@@ -523,10 +523,26 @@ async def start_llama_server(model_id: str, port: int) -> tuple[subprocess.Popen
     while elapsed < max_wait_time:
         # Check if process died
         if process.poll() is not None:
-            stdout, _ = process.communicate()
             logger.error(f"llama-server exited with code {process.returncode}")
-            logger.error(f"Output: {stdout}")
-            raise RuntimeError("llama-server process died")
         try:
             # Use aiohttp for async health check
@@ -596,14 +612,22 @@ async def startup_event():
     model_id = AVAILABLE_MODELS[current_model]
     port = model_cache._get_next_port()
-    process, load_time = await start_llama_server(model_id, port)
-    await model_cache.put(current_model, model_id, process, port, load_time)
-    metrics.startup_time = time.time() - startup_start
-    logger.info(f"Started with default model: {current_model} (total startup: {metrics.startup_time:.2f}s)")
-    # Start preloading in background
-    asyncio.create_task(preload_models_background())
 @app.on_event("shutdown")
@@ -611,10 +635,20 @@ async def shutdown_event():
     """Clean shutdown - clear cache and close HTTP session."""
     logger.info("Application shutdown initiated")
-    if http_session:
-        await http_session.close()
-    await model_cache.clear()
 @app.get(

     while elapsed < max_wait_time:
         # Check if process died
         if process.poll() is not None:
+            # Process died - collect output for debugging
+            try:
+                stdout = process.stdout.read() if process.stdout else ""
+            except:
+                stdout = "Unable to read stdout"
             logger.error(f"llama-server exited with code {process.returncode}")
+            logger.error(f"Model ID: {model_id}")
+            logger.error(f"Port: {port}")
+            logger.error(f"Output:\n{stdout}")
+            # Provide helpful error message
+            error_msg = f"llama-server process died (exit code {process.returncode})"
+            if "HTTPS is not supported" in str(stdout):
+                error_msg += "\n\nHTTPS support is missing. The llama-server binary needs to be rebuilt with CURL/SSL support."
+                error_msg += "\nAdd -DLLAMA_CURL=ON to the cmake build flags."
+            elif "no usable GPU found" in str(stdout):
+                error_msg += "\n\nNote: Running on CPU only (no GPU detected)."
+            raise RuntimeError(error_msg)
         try:
             # Use aiohttp for async health check
     model_id = AVAILABLE_MODELS[current_model]
     port = model_cache._get_next_port()
+    try:
+        process, load_time = await start_llama_server(model_id, port)
+        await model_cache.put(current_model, model_id, process, port, load_time)
+        metrics.startup_time = time.time() - startup_start
+        logger.info(f"Started with default model: {current_model} (total startup: {metrics.startup_time:.2f}s)")
+        # Start preloading in background
+        asyncio.create_task(preload_models_background())
+    except Exception as e:
+        # Clean up on startup failure
+        logger.error(f"Startup failed: {e}")
+        if http_session:
+            await http_session.close()
+        model_cache._release_port(port)
+        raise
 @app.on_event("shutdown")
     """Clean shutdown - clear cache and close HTTP session."""
     logger.info("Application shutdown initiated")
+    # Clear model cache first
+    try:
+        await model_cache.clear()
+    except Exception as e:
+        logger.error(f"Error clearing cache during shutdown: {e}")
+    # Close HTTP session
+    if http_session and not http_session.closed:
+        try:
+            await http_session.close()
+            # Give it a moment to close gracefully
+            await asyncio.sleep(0.1)
+        except Exception as e:
+            logger.error(f"Error closing HTTP session: {e}")
 @app.get(