Spaces:

Bleak
/

Llama-Cpp-Python-Qwen3

Build error

App Files Files Community

Bleak commited on Nov 3

Commit

c1adcd0

1 Parent(s): e5bc96b

initial commit.

Browse files

Files changed (2) hide show

Dockerfile +63 -0
entrypoint.sh +12 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,63 @@

+#Ollama-API By BleakPrestiger
+# Builder stage
+FROM python:latest
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends python3 python3-pip && \
+    rm -rf /var/lib/apt/lists/*
+# In your Dockerfile
+RUN pip install huggingface_hub "huggingface_hub[cli]" llama-cpp-python[server] --break-system-packages
+#RUN huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF Qwen3-4B-Thinking-2507-Q4_K_M.gguf  --local-dir . --local-dir-use-symlinks False
+# Download the model during the build process
+RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='unsloth/Qwen3-4B-Thinking-2507-GGUF', filename='Qwen3-4B-Thinking-2507-Q4_K_M.gguf', local_dir='.')"
+# Update packages and install curl and gnupg
+RUN apt-get update && apt-get upgrade -y && apt-get install -y \
+    curl \
+    wget \
+    gnupg
+#RUN cd llama-b6795-bin-ubuntu-x64/build/bin && chmod +x ./llama-server && ./llama-server --model Qwen3-1.7B-Q8_0.gguf --ctx-size-draft 32767 --ctx-size 32767 --temp 1.0 --top-k 64 --top-k 0.95 --min-p 0.0 --log-file llama.log &
+COPY .. /app
+RUN ls
+# Copy the entry point script
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+# Set the entry point script as the default command
+ENTRYPOINT ["/entrypoint.sh"]
+#CMD ["ollama", "serve"]
+# Expose the server port
+EXPOSE 7860
+# Add NVIDIA package repositories
+#RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+    #&& echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/ $(. /etc/os-release; echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/nvidia-container-toolkit.list
+# Install NVIDIA container toolkit (Check for any updated methods or URLs for Ubuntu jammy)
+#RUN apt-get update && apt-get install -y nvidia-container-toolkit || true
+# Install application
+#RUN curl https://ollama.ai/install.sh | sh
+# Below is to fix embedding bug as per
+# RUN curl -fsSL https://ollama.com/install.sh | sed 's#https://ollama.com/download#https://github.com/jmorganca/ollama/releases/download/v0.1.29#' | sh
+# Create the directory and give appropriate permissions
+#RUN mkdir -p /.ollama && chmod 777 /.ollama
+#WORKDIR /.ollama
+# Set the entry point script as the default command
+#ENTRYPOINT ["/entrypoint.sh"]
+#CMD ["ollama", "serve"]
+# Set the model as an environment variable (this can be overridden)
+#ENV model=${model}

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/bin/bash
+echo "$PWD"
+#export PATH="llama-b6795-bin-ubuntu-x64/build/bin:$PATH"
+#huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF --include  --local-dir . --local-dir-use-symlinks False
+#huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF Qwen3-4B-Thinking-2507-Q4_K_M.gguf  --local-dir . --local-dir-use-symlinks False
+echo "$(ls)"
+# Starting server
+echo "Starting Llama-Cpp-Python server"
+sleep 10
+python3 -m llama_cpp.server --model Qwen3-4B-Thinking-2507-GGUF --host 0.0.0.0 --port 7860
+#cd llama-b6795-bin-ubuntu-x64/build/bin && chmod +x ./llama-server && ./llama-server --model /app/Qwen3-4B-Thinking-2507-Q4_K_M.gguf --host 0.0.0.0 --port 7860 --temp 1.0 --top-k 64 --top-k 0.95 --min-p 0.0 --log-file llama.log &
+wait