Spaces:

OrbitMC
/

hf_dead

Runtime error

OrbitMC commited on Mar 26

Commit

7cfb924

verified ·

1 Parent(s): b5462d9

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,21 +1,46 @@
-from huggingface_hub import hf_hub_download
-from llama_cpp.server.app import create_app
-import uvicorn
-# 1. Download the model files
-repo = "unsloth/Qwen3.5-9B-GGUF"
-model_path = hf_hub_download(repo_id=repo, filename="Qwen3.5-9B-UD-Q4_K_XL.gguf")
-clip_path = hf_hub_download(repo_id=repo, filename="mmproj-BF16.gguf")
-# 2. Run the server (OpenAI Compatible)
-# This mimics exactly what your Docker CMD was doing
-if __name__ == "__main__":
-    app = create_app(
-        model_path=model_path,
-        chat_format="chatml", # Qwen uses chatml
-        n_ctx=128000,
-        clip_model_path=clip_path # For vision/multimodal support
-    )
-    print("Server starting on http://0.0.0.0:7860/v1")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+# Stage 1: Build llama.cpp
+FROM ubuntu:22.04 AS builder
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    git \
+    libcurl4-openssl-dev \
+    python3-pip
+# Clone raw llama.cpp
+WORKDIR /app
+RUN git clone https://github.com/ggml-org/llama.cpp.git .
+# Build with UI DISABLED for a pure headless API
+# We also enable cURL support for remote model loading if needed
+RUN cmake -B build \
+    -DLLAMA_BUILD_WEBUI=OFF \
+    -DLLAMA_CURL=ON \
+    -DLLAMA_BUILD_EXAMPLES=OFF
+RUN cmake --build build --config Release -j $(nproc) --target llama-server
+# Stage 2: Runtime
+FROM ubuntu:22.04
+RUN apt-get update && apt-get install -y libcurl4 python3-pip && rm -rf /var/lib/apt/lists/*
+RUN pip install huggingface_hub
+WORKDIR /app
+COPY --from=builder /app/build/bin/llama-server /app/llama-server
+# Download official Qwen GGUF (Non-Unsloth)
+RUN python3 -c 'from huggingface_hub import hf_hub_download; \
+    hf_hub_download(repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF", \
+    filename="qwen2.5-7b-instruct-q4_k_m.gguf", local_dir="/app")'
+# HF Spaces run on 7860
+EXPOSE 7860
+# Run headless server
+CMD ["./llama-server", \
+     "-m", "/app/qwen2.5-7b-instruct-q4_k_m.gguf", \
+     "--host", "0.0.0.0", \
+     "--port", "7860", \
+     "-c", "32768", \
+     "--embedding"]