Bleak commited on
Commit
c1adcd0
·
1 Parent(s): e5bc96b

initial commit.

Browse files
Files changed (2) hide show
  1. Dockerfile +63 -0
  2. entrypoint.sh +12 -0
Dockerfile ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Ollama-API By BleakPrestiger
2
+ # Builder stage
3
+ FROM python:latest
4
+
5
+ WORKDIR /app
6
+
7
+ RUN apt-get update && \
8
+ apt-get install -y --no-install-recommends python3 python3-pip && \
9
+ rm -rf /var/lib/apt/lists/*
10
+
11
+ # In your Dockerfile
12
+ RUN pip install huggingface_hub "huggingface_hub[cli]" llama-cpp-python[server] --break-system-packages
13
+ #RUN huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF Qwen3-4B-Thinking-2507-Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
14
+
15
+ # Download the model during the build process
16
+ RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='unsloth/Qwen3-4B-Thinking-2507-GGUF', filename='Qwen3-4B-Thinking-2507-Q4_K_M.gguf', local_dir='.')"
17
+
18
+ # Update packages and install curl and gnupg
19
+ RUN apt-get update && apt-get upgrade -y && apt-get install -y \
20
+ curl \
21
+ wget \
22
+ gnupg
23
+
24
+ #RUN cd llama-b6795-bin-ubuntu-x64/build/bin && chmod +x ./llama-server && ./llama-server --model Qwen3-1.7B-Q8_0.gguf --ctx-size-draft 32767 --ctx-size 32767 --temp 1.0 --top-k 64 --top-k 0.95 --min-p 0.0 --log-file llama.log &
25
+
26
+ COPY .. /app
27
+
28
+ RUN ls
29
+
30
+ # Copy the entry point script
31
+ COPY entrypoint.sh /entrypoint.sh
32
+ RUN chmod +x /entrypoint.sh
33
+ # Set the entry point script as the default command
34
+ ENTRYPOINT ["/entrypoint.sh"]
35
+ #CMD ["ollama", "serve"]
36
+
37
+ # Expose the server port
38
+ EXPOSE 7860
39
+
40
+ # Add NVIDIA package repositories
41
+ #RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
42
+ #&& echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/ $(. /etc/os-release; echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/nvidia-container-toolkit.list
43
+
44
+ # Install NVIDIA container toolkit (Check for any updated methods or URLs for Ubuntu jammy)
45
+ #RUN apt-get update && apt-get install -y nvidia-container-toolkit || true
46
+
47
+ # Install application
48
+ #RUN curl https://ollama.ai/install.sh | sh
49
+ # Below is to fix embedding bug as per
50
+ # RUN curl -fsSL https://ollama.com/install.sh | sed 's#https://ollama.com/download#https://github.com/jmorganca/ollama/releases/download/v0.1.29#' | sh
51
+
52
+
53
+ # Create the directory and give appropriate permissions
54
+ #RUN mkdir -p /.ollama && chmod 777 /.ollama
55
+
56
+ #WORKDIR /.ollama
57
+
58
+ # Set the entry point script as the default command
59
+ #ENTRYPOINT ["/entrypoint.sh"]
60
+ #CMD ["ollama", "serve"]
61
+
62
+ # Set the model as an environment variable (this can be overridden)
63
+ #ENV model=${model}
entrypoint.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "$PWD"
3
+ #export PATH="llama-b6795-bin-ubuntu-x64/build/bin:$PATH"
4
+ #huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF --include --local-dir . --local-dir-use-symlinks False
5
+ #huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF Qwen3-4B-Thinking-2507-Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
6
+ echo "$(ls)"
7
+ # Starting server
8
+ echo "Starting Llama-Cpp-Python server"
9
+ sleep 10
10
+ python3 -m llama_cpp.server --model Qwen3-4B-Thinking-2507-GGUF --host 0.0.0.0 --port 7860
11
+ #cd llama-b6795-bin-ubuntu-x64/build/bin && chmod +x ./llama-server && ./llama-server --model /app/Qwen3-4B-Thinking-2507-Q4_K_M.gguf --host 0.0.0.0 --port 7860 --temp 1.0 --top-k 64 --top-k 0.95 --min-p 0.0 --log-file llama.log &
12
+ wait