Dmitry Beresnev commited on
Commit
f64a284
·
1 Parent(s): 7763bf4

fix dockerfile

Browse files
Files changed (2) hide show
  1. Dockerfile +6 -4
  2. app.py +46 -12
Dockerfile CHANGED
@@ -9,9 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
9
  libcurl4-openssl-dev \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
- # Clone and build llama.cpp with MINIMAL optimizations (fast build)
13
  WORKDIR /build
14
- ARG CACHEBUST=4
15
  RUN git clone https://github.com/ggerganov/llama.cpp.git && \
16
  cd llama.cpp && \
17
  cmake -B build -DCMAKE_BUILD_TYPE=Release \
@@ -19,7 +19,8 @@ RUN git clone https://github.com/ggerganov/llama.cpp.git && \
19
  -DGGML_AVX2=OFF \
20
  -DGGML_AVX=OFF \
21
  -DGGML_FMA=OFF \
22
- -DGGML_F16C=OFF && \
 
23
  cmake --build build --config Release --target llama-server -j1 && \
24
  echo "=== Binary dependencies ===" && \
25
  ldd build/bin/llama-server || true
@@ -27,12 +28,13 @@ RUN git clone https://github.com/ggerganov/llama.cpp.git && \
27
  # Runtime stage
28
  FROM debian:bookworm-slim
29
 
30
- # Install runtime dependencies
31
  RUN apt-get update && apt-get install -y --no-install-recommends \
32
  libcurl4 \
33
  ca-certificates \
34
  libgomp1 \
35
  libstdc++6 \
 
36
  && rm -rf /var/lib/apt/lists/*
37
 
38
  # Copy llama-server binary and all shared libraries from builder
 
9
  libcurl4-openssl-dev \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
+ # Clone and build llama.cpp with SSL support for HuggingFace Hub
13
  WORKDIR /build
14
+ ARG CACHEBUST=5
15
  RUN git clone https://github.com/ggerganov/llama.cpp.git && \
16
  cd llama.cpp && \
17
  cmake -B build -DCMAKE_BUILD_TYPE=Release \
 
19
  -DGGML_AVX2=OFF \
20
  -DGGML_AVX=OFF \
21
  -DGGML_FMA=OFF \
22
+ -DGGML_F16C=OFF \
23
+ -DLLAMA_CURL=ON && \
24
  cmake --build build --config Release --target llama-server -j1 && \
25
  echo "=== Binary dependencies ===" && \
26
  ldd build/bin/llama-server || true
 
28
  # Runtime stage
29
  FROM debian:bookworm-slim
30
 
31
+ # Install runtime dependencies including SSL/HTTPS support
32
  RUN apt-get update && apt-get install -y --no-install-recommends \
33
  libcurl4 \
34
  ca-certificates \
35
  libgomp1 \
36
  libstdc++6 \
37
+ openssl \
38
  && rm -rf /var/lib/apt/lists/*
39
 
40
  # Copy llama-server binary and all shared libraries from builder
app.py CHANGED
@@ -523,10 +523,26 @@ async def start_llama_server(model_id: str, port: int) -> tuple[subprocess.Popen
523
  while elapsed < max_wait_time:
524
  # Check if process died
525
  if process.poll() is not None:
526
- stdout, _ = process.communicate()
 
 
 
 
 
527
  logger.error(f"llama-server exited with code {process.returncode}")
528
- logger.error(f"Output: {stdout}")
529
- raise RuntimeError("llama-server process died")
 
 
 
 
 
 
 
 
 
 
 
530
 
531
  try:
532
  # Use aiohttp for async health check
@@ -596,14 +612,22 @@ async def startup_event():
596
  model_id = AVAILABLE_MODELS[current_model]
597
  port = model_cache._get_next_port()
598
 
599
- process, load_time = await start_llama_server(model_id, port)
600
- await model_cache.put(current_model, model_id, process, port, load_time)
 
601
 
602
- metrics.startup_time = time.time() - startup_start
603
- logger.info(f"Started with default model: {current_model} (total startup: {metrics.startup_time:.2f}s)")
604
 
605
- # Start preloading in background
606
- asyncio.create_task(preload_models_background())
 
 
 
 
 
 
 
607
 
608
 
609
  @app.on_event("shutdown")
@@ -611,10 +635,20 @@ async def shutdown_event():
611
  """Clean shutdown - clear cache and close HTTP session."""
612
  logger.info("Application shutdown initiated")
613
 
614
- if http_session:
615
- await http_session.close()
 
 
 
616
 
617
- await model_cache.clear()
 
 
 
 
 
 
 
618
 
619
 
620
  @app.get(
 
523
  while elapsed < max_wait_time:
524
  # Check if process died
525
  if process.poll() is not None:
526
+ # Process died - collect output for debugging
527
+ try:
528
+ stdout = process.stdout.read() if process.stdout else ""
529
+ except:
530
+ stdout = "Unable to read stdout"
531
+
532
  logger.error(f"llama-server exited with code {process.returncode}")
533
+ logger.error(f"Model ID: {model_id}")
534
+ logger.error(f"Port: {port}")
535
+ logger.error(f"Output:\n{stdout}")
536
+
537
+ # Provide helpful error message
538
+ error_msg = f"llama-server process died (exit code {process.returncode})"
539
+ if "HTTPS is not supported" in str(stdout):
540
+ error_msg += "\n\nHTTPS support is missing. The llama-server binary needs to be rebuilt with CURL/SSL support."
541
+ error_msg += "\nAdd -DLLAMA_CURL=ON to the cmake build flags."
542
+ elif "no usable GPU found" in str(stdout):
543
+ error_msg += "\n\nNote: Running on CPU only (no GPU detected)."
544
+
545
+ raise RuntimeError(error_msg)
546
 
547
  try:
548
  # Use aiohttp for async health check
 
612
  model_id = AVAILABLE_MODELS[current_model]
613
  port = model_cache._get_next_port()
614
 
615
+ try:
616
+ process, load_time = await start_llama_server(model_id, port)
617
+ await model_cache.put(current_model, model_id, process, port, load_time)
618
 
619
+ metrics.startup_time = time.time() - startup_start
620
+ logger.info(f"Started with default model: {current_model} (total startup: {metrics.startup_time:.2f}s)")
621
 
622
+ # Start preloading in background
623
+ asyncio.create_task(preload_models_background())
624
+ except Exception as e:
625
+ # Clean up on startup failure
626
+ logger.error(f"Startup failed: {e}")
627
+ if http_session:
628
+ await http_session.close()
629
+ model_cache._release_port(port)
630
+ raise
631
 
632
 
633
  @app.on_event("shutdown")
 
635
  """Clean shutdown - clear cache and close HTTP session."""
636
  logger.info("Application shutdown initiated")
637
 
638
+ # Clear model cache first
639
+ try:
640
+ await model_cache.clear()
641
+ except Exception as e:
642
+ logger.error(f"Error clearing cache during shutdown: {e}")
643
 
644
+ # Close HTTP session
645
+ if http_session and not http_session.closed:
646
+ try:
647
+ await http_session.close()
648
+ # Give it a moment to close gracefully
649
+ await asyncio.sleep(0.1)
650
+ except Exception as e:
651
+ logger.error(f"Error closing HTTP session: {e}")
652
 
653
 
654
  @app.get(