File size: 2,500 Bytes
9df9f40
 
 
 
 
 
e05dc99
 
 
 
9df9f40
33560a5
c50952e
9df9f40
 
c50952e
9df9f40
 
 
 
 
 
 
8b598e1
9df9f40
f696b45
 
 
9aecc7c
9df9f40
9aecc7c
9df9f40
 
9aecc7c
9df9f40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5a3a6c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#Ollama-API By BleakPrestiger
# Builder stage
FROM ubuntu:latest

WORKDIR /app

RUN apt-get update && \
    apt-get install -y --no-install-recommends python3 python3-pip && \
    rm -rf /var/lib/apt/lists/*

# In your Dockerfile
RUN pip install huggingface_hub "huggingface_hub[cli]" --break-system-packages
#RUN huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF Qwen3-4B-Thinking-2507-Q4_K_M.gguf  --local-dir . --local-dir-use-symlinks False

# Download the model during the build process
RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='unsloth/Qwen3-4B-Thinking-2507-GGUF', filename='Qwen3-4B-Thinking-2507-Q4_K_M.gguf', local_dir='.')"

# Update packages and install curl and gnupg
RUN apt-get update && apt-get upgrade -y && apt-get install -y \
    curl \
    wget \
    gnupg

#RUN cd llama-b6795-bin-ubuntu-x64/build/bin && chmod +x ./llama-server && ./llama-server --model Qwen3-1.7B-Q8_0.gguf --ctx-size-draft 32767 --ctx-size 32767 --temp 1.0 --top-k 64 --top-k 0.95 --min-p 0.0 --log-file llama.log &

COPY .. /app

RUN ls

# Copy the entry point script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Set the entry point script as the default command
ENTRYPOINT ["/entrypoint.sh"]
#CMD ["ollama", "serve"]

# Expose the server port
EXPOSE 7860

# Add NVIDIA package repositories
#RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
    #&& echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/ $(. /etc/os-release; echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/nvidia-container-toolkit.list

# Install NVIDIA container toolkit (Check for any updated methods or URLs for Ubuntu jammy)
#RUN apt-get update && apt-get install -y nvidia-container-toolkit || true

# Install application
#RUN curl https://ollama.ai/install.sh | sh
# Below is to fix embedding bug as per
# RUN curl -fsSL https://ollama.com/install.sh | sed 's#https://ollama.com/download#https://github.com/jmorganca/ollama/releases/download/v0.1.29#' | sh


# Create the directory and give appropriate permissions
#RUN mkdir -p /.ollama && chmod 777 /.ollama

#WORKDIR /.ollama

# Set the entry point script as the default command
#ENTRYPOINT ["/entrypoint.sh"]
#CMD ["ollama", "serve"]

# Set the model as an environment variable (this can be overridden)
#ENV model=${model}