jaothan commited on
Commit
9c1ceb7
·
verified ·
1 Parent(s): 334e5fa

Upload 13 files

Browse files
llamacpp_python/Makefile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ APP := llamacpp_python
2
+ PORT ?= 8001
3
+ CHAT_FORMAT ?=
4
+
5
+ include ../common/Makefile.common
6
+
7
+ IMAGE_NAME ?= $(REGISTRY_ORG)/$(COMPONENT)/$(APP):latest
8
+ IMAGE := $(REGISTRY)/$(IMAGE_NAME)
9
+ CUDA_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_cuda:latest
10
+ VULKAN_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_vulkan:latest
11
+
12
+ MODELS_PATH := /locallm/models
13
+ MODEL_NAME ?= granite-7b-lab-Q4_K_M.gguf
14
+
15
+ .Phony: all
16
+ all: build download-model-granite run
17
+
18
+ .PHONY: build-cuda
19
+ build-cuda:
20
+ "${CONTAINER_TOOL}" build --squash-all -t $(CUDA_IMAGE) . -f cuda/Containerfile
21
+
22
+ .PHONY: build-vulkan-amd64 build-vulkan-arm64
23
+ build-vulkan-amd64:
24
+ "${CONTAINER_TOOL}" build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/amd64/Containerfile
25
+ build-vulkan-arm64:
26
+ "${CONTAINER_TOOL}" build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/arm64/Containerfile
27
+
28
+ .PHONY: download-model-granite # default model
29
+ download-model-granite:
30
+ cd ../../models/ && \
31
+ make download-model-granite
llamacpp_python/README.md ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Llamacpp_Python Model Server
2
+
3
+ The llamacpp_python model server images are based on the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) project that provides python bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp). This provides us with a python based and OpenAI API compatible model server that can run LLM's of various sizes locally across Linux, Windows or Mac.
4
+
5
+ This model server requires models to be converted from their original format, typically a set of `*.bin` or `*.safetensor` files into a single GGUF formatted file. Many models are available in GGUF format already on [huggingface.co](https://huggingface.co). You can also use the [model converter utility](../../convert_models/) available in this repo to convert models yourself.
6
+
7
+
8
+ ## Image Options
9
+
10
+ We currently provide 3 options for the llamacpp_python model server:
11
+ * [Base](#base)
12
+ * [Cuda](#cuda)
13
+ * [Vulkan (experimental)](#vulkan-experimental)
14
+
15
+ ### Base
16
+
17
+ The [base image](../llamacpp_python/base/Containerfile) is the standard image that works for both arm64 and amd64 environments. However, it does not includes any hardware acceleration and will run with CPU only. If you use the base image, make sure that your container runtime has sufficient resources to run the desired model(s).
18
+
19
+ To build the base model service image:
20
+
21
+ ```bash
22
+ make build
23
+ ```
24
+
25
+ To pull the base model service image:
26
+
27
+ ```bash
28
+ podman pull quay.io/ai-lab/llamacpp_python
29
+ ```
30
+
31
+
32
+ ### Cuda
33
+
34
+ The [Cuda image](../llamacpp_python/cuda/Containerfile) include all the extra drivers necessary to run our model server with Nvidia GPUs. This will significant speed up the models response time over CPU only deployments.
35
+
36
+ To Build the the Cuda variant image:
37
+
38
+ ```bash
39
+ make build-cuda
40
+ ```
41
+
42
+ To pull the base model service image:
43
+
44
+ ```bash
45
+ podman pull quay.io/ai-lab/llamacpp_python_cuda
46
+ ```
47
+
48
+ **IMPORTANT!**
49
+
50
+ To run the Cuda image with GPU acceleration, you need to install the correct [Cuda drivers](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#driver-installation) for your system along with the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#). Please use the links provided to find installation instructions for your system.
51
+
52
+ Once those are installed you can use the container toolkit CLI to discover your Nvidia device(s).
53
+
54
+ ```bash
55
+ sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
56
+ ```
57
+
58
+ Finally, you will also need to add `--device nvidia.com/gpu=all` to your `podman run` command so your container can access the GPU.
59
+
60
+
61
+ ### Vulkan (experimental)
62
+
63
+ The [Vulkan](https://docs.vulkan.org/guide/latest/what_is_vulkan.html) image ([amd64](../llamacpp_python/vulkan/amd64/Containerfile)/[arm64](../llamacpp_python/vulkan/arm64/Containerfile)) is experimental, but can be used for gaining partial GPU access on an M-series Mac, significantly speeding up model response time over a CPU only deployment. This image requires that your podman machine provider is "applehv" and that you use krunkit instead of vfkit. Since these tools are not currently supported by podman desktop this image will remain "experimental".
64
+
65
+ To build the Vulkan model service variant image:
66
+
67
+ | System Architecture | Command |
68
+ |---|---|
69
+ | amd64 | make build-vulkan-amd64 |
70
+ | arm64 | make build-vulkan-arm64 |
71
+
72
+ To pull the base model service image:
73
+
74
+ ```bash
75
+ podman pull quay.io/ai-lab/llamacpp_python_vulkan
76
+ ```
77
+
78
+
79
+ ## Download Model(s)
80
+
81
+ There are many models to choose from these days, most of which can be found on [huggingface.co](https://huggingface.co). In order to use a model with the llamacpp_python model server, it must be in GGUF format. You can either download pre-converted GGUF models directly or convert them yourself with the [model converter utility](../../convert_models/) available in this repo.
82
+
83
+ A well performant Apache-2.0 licensed models that we recommend using if you are just getting started is
84
+ `granite-7b-lab`. You can use the link below to quickly download a quantized (smaller) GGUF version of this model for use with the llamacpp_python model server.
85
+
86
+ Download URL: [https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf](https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf)
87
+
88
+ Place all models in the [models](../../models/) directory.
89
+
90
+ You can use this snippet below to download the default model:
91
+
92
+ ```bash
93
+ make download-model-granite
94
+ ```
95
+
96
+ Or you can use the generic `download-models` target from the `/models` directory to download any model file from huggingface:
97
+
98
+ ```bash
99
+ cd ../../models
100
+ make MODEL_NAME=<model_name> MODEL_URL=<model_url> -f Makefile download-model
101
+ # EX: make MODEL_NAME=granite-7b-lab-Q4_K_M.gguf MODEL_URL=https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf -f Makefile download-model
102
+ ```
103
+
104
+
105
+ ## Deploy Model Service
106
+
107
+ ### Single Model Service:
108
+
109
+ To deploy the LLM server you must specify a volume mount `-v` where your models are stored on the host machine and the `MODEL_PATH` for your model of choice. The model_server is most easily deploy from calling the make command: `make -f Makefile run`. Of course as with all our make calls you can pass any number of the following variables: `REGISTRY`, `IMAGE_NAME`, `MODEL_NAME`, `MODEL_PATH`, and `PORT`.
110
+
111
+ ```bash
112
+ podman run --rm -d \
113
+ -p 8001:8001 \
114
+ -v Local/path/to/locallm/models:/locallm/models:ro \
115
+ -e MODEL_PATH=models/granite-7b-lab-Q4_K_M.gguf \
116
+ -e HOST=0.0.0.0 \
117
+ -e PORT=8001 \
118
+ -e MODEL_CHAT_FORMAT=openchat \
119
+ llamacpp_python
120
+ ```
121
+
122
+ or with Cuda image
123
+
124
+ ```bash
125
+ podman run --rm -d \
126
+ --device nvidia.com/gpu=all \
127
+ -p 8001:8001 \
128
+ -v Local/path/to/locallm/models:/locallm/models:ro \
129
+ -e MODEL_PATH=models/granite-7b-lab-Q4_K_M.gguf \
130
+ -e HOST=0.0.0.0 \
131
+ -e PORT=8001 \
132
+ -e MODEL_CHAT_FORMAT=openchat \
133
+ llamacpp_python
134
+ ```
135
+
136
+ ### Multiple Model Service:
137
+
138
+ To enable dynamic loading and unloading of different models present on your machine, you can start the model service with a `CONFIG_PATH` instead of a `MODEL_PATH`.
139
+
140
+ Here is an example `models_config.json` with two model options.
141
+
142
+ ```json
143
+ {
144
+ "host": "0.0.0.0",
145
+ "port": 8001,
146
+ "models": [
147
+ {
148
+ "model": "models/granite-7b-lab-Q4_K_M.gguf",
149
+ "model_alias": "granite",
150
+ "chat_format": "openchat",
151
+ },
152
+ {
153
+ "model": "models/merlinite-7b-lab-Q4_K_M.gguf",
154
+ "model_alias": "merlinite",
155
+ "chat_format": "openchat",
156
+ },
157
+
158
+ ]
159
+ }
160
+ ```
161
+
162
+ Now run the container with the specified config file.
163
+
164
+ ```bash
165
+ podman run --rm -d \
166
+ -p 8001:8001 \
167
+ -v Local/path/to/locallm/models:/locallm/models:ro \
168
+ -e CONFIG_PATH=models/<config-filename> \
169
+ llamacpp_python
170
+ ```
171
+
172
+ ### DEV environment
173
+
174
+ The environment is implemented with devcontainer technology.
175
+
176
+ Running tests
177
+
178
+ ```bash
179
+ make -f Makefile test
180
+ ```
llamacpp_python/base/Containerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM registry.access.redhat.com/ubi9/python-311:1-77.1726664316
2
+ WORKDIR /locallm
3
+ COPY src .
4
+ USER root
5
+ RUN dnf install -y gcc-toolset-13-gcc gcc-toolset-13-gcc-c++
6
+ USER 1001
7
+ RUN CC="/opt/rh/gcc-toolset-13/root/usr/bin/gcc" CXX="/opt/rh/gcc-toolset-13/root/usr/bin/g++" pip install --no-cache-dir --verbose -r ./requirements.txt
8
+ EXPOSE 8001
9
+ ENTRYPOINT [ "sh", "./run.sh" ]
llamacpp_python/cuda/Containerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM quay.io/opendatahub/workbench-images:cuda-ubi9-python-3.9-20231206
2
+ USER root
3
+ RUN dnf install -y gcc-toolset-13-gcc gcc-toolset-13-gcc-c++
4
+ USER 1001
5
+ WORKDIR /locallm
6
+ COPY src .
7
+ ENV CMAKE_ARGS="-DGGML_CUDA=on -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF"
8
+ ENV FORCE_CMAKE=1
9
+ RUN CC="/opt/rh/gcc-toolset-13/root/usr/bin/gcc" CXX="/opt/rh/gcc-toolset-13/root/usr/bin/g++" pip install --no-cache-dir -r ./requirements.txt
10
+ ENTRYPOINT [ "sh", "run.sh" ]
llamacpp_python/src/requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ llama-cpp-python[server]==0.2.90
2
+ transformers==4.41.2
3
+ pip==24.0
llamacpp_python/src/run.sh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then
3
+ python -m llama_cpp.server --config_file ${CONFIG_PATH}
4
+ exit 0
5
+ fi
6
+
7
+ if [ "${MODEL_HF_PRETRAINED_MODEL}" == "None" ]; then
8
+ MODEL_HF_PRETRAINED_MODEL=""
9
+ fi
10
+
11
+ if [ ${MODEL_PATH} ]; then
12
+ python -m llama_cpp.server \
13
+ --model ${MODEL_PATH} \
14
+ --host ${HOST:=0.0.0.0} \
15
+ --port ${PORT:=8001} \
16
+ --n_gpu_layers ${GPU_LAYERS:=0} \
17
+ --clip_model_path ${CLIP_MODEL_PATH:=None} \
18
+ --chat_format ${MODEL_CHAT_FORMAT:=llama-2} \
19
+ ${PRETRAINED_MODEL_PATH:=} \
20
+ ${MODEL_HF_PRETRAINED_MODEL:+--hf_pretrained_model_name_or_path ${MODEL_HF_PRETRAINED_MODEL}} \
21
+ --interrupt_requests ${INTERRUPT_REQUESTS:=False}
22
+ exit 0
23
+ fi
24
+
25
+ echo "Please set either a CONFIG_PATH or a MODEL_PATH"
26
+ exit 1
27
+
llamacpp_python/tests/__init__.py ADDED
File without changes
llamacpp_python/tests/conftest.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest_container
2
+ import os
3
+
4
+ # For cuda, will add this to below Container: extra_launch_args=["--device", "nvidia.com/gpu=all"],
5
+ if not 'REGISTRY' in os.environ:
6
+ REGISTRY = 'ghcr.io'
7
+ else:
8
+ REGISTRY = os.environ['REGISTRY']
9
+
10
+ if not 'IMAGE_NAME' in os.environ:
11
+ IMAGE_NAME = 'containers/llamacpp_python:latest'
12
+ else:
13
+ IMAGE_NAME = os.environ['IMAGE_NAME']
14
+
15
+ if not 'MODEL_NAME' in os.environ:
16
+ MODEL_NAME = 'granite-7b-lab-Q4_K_M.gguf'
17
+ else:
18
+ MODEL_NAME = os.environ['MODEL_NAME']
19
+
20
+ if not 'MODEL_PATH' in os.environ:
21
+ MODEL_PATH = "/locallm/models"
22
+ else:
23
+ MODEL_PATH = os.environ['MODEL_PATH']
24
+
25
+ if not 'PORT' in os.environ:
26
+ PORT = 8001
27
+ else:
28
+ PORT = os.environ['PORT']
29
+ try:
30
+ PORT = int(PORT)
31
+ except:
32
+ PORT = 8001
33
+
34
+ MS = pytest_container.Container(
35
+ url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
36
+ volume_mounts=[
37
+ pytest_container.container.BindMount(
38
+ container_path="{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),
39
+ host_path=f"./{MODEL_NAME}",
40
+ flags=["ro"]
41
+ )
42
+ ],
43
+ extra_environment_variables={
44
+ "MODEL_PATH": "{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),
45
+ "HOST": "0.0.0.0",
46
+ "PORT": f"{PORT}"
47
+ },
48
+ forwarded_ports=[
49
+ pytest_container.PortForwarding(
50
+ container_port=PORT,
51
+ host_port=PORT
52
+ )
53
+ ],
54
+ )
55
+
56
+ def pytest_generate_tests(metafunc):
57
+ pytest_container.auto_container_parametrize(metafunc)
58
+
59
+ def pytest_addoption(parser):
60
+ pytest_container.add_logging_level_options(parser)
llamacpp_python/tests/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pip==24.0
2
+ pytest-container==0.4.0
3
+ pytest-selenium==4.1.0
4
+ pytest-testinfra==10.1.0
5
+ pytest==8.1.1
6
+ requests==2.31.0
7
+ selenium==4.19.0
8
+ tenacity==8.2.3
llamacpp_python/tests/test_alive.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest_container
2
+ from .conftest import MS
3
+ import tenacity
4
+ import os
5
+
6
+ CONTAINER_IMAGES = [MS]
7
+
8
+ def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
9
+ assert auto_container.connection.file("/etc/os-release").exists
10
+
11
+ @tenacity.retry(stop=tenacity.stop_after_attempt(5), wait=tenacity.wait_exponential())
12
+ def test_alive(auto_container: pytest_container.container.ContainerData, host):
13
+ host.run_expect([0],f"curl http://localhost:{auto_container.forwarded_ports[0].host_port}",).stdout.strip()
llamacpp_python/tooling_options.ipynb ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Interact with the AI Lab Playground\n",
8
+ "\n",
9
+ "This notebook will demonstrate 3 (although there could be many more) ways to write python code to interact with our playground model image. The goal here is to show that our approach allows for flexibility and user choice when it comes to which LLM framework they want to use for development. \n",
10
+ "\n",
11
+ "* [Custom code](#custom-code)\n",
12
+ "* [OpenAI API for python](#openai-python)\n",
13
+ "* [LangChain](#langchain)\n",
14
+ "\n",
15
+ "All 3 examples will demonstrate making a chat request to the same model service and getting a streaming text response back. \n",
16
+ "\n"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "markdown",
21
+ "metadata": {},
22
+ "source": [
23
+ "This notebook assumes that the playground image is running locally. Once built, you can use the below to start the model service image. \n",
24
+ "\n",
25
+ "```bash\n",
26
+ "podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.2.Q4_K_M.gguf playground\n",
27
+ "```"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "markdown",
32
+ "metadata": {},
33
+ "source": [
34
+ "### Custom Code\n",
35
+ "\n",
36
+ "This code block indicates that we can fairly easily write a custom class that only relies on the python packages `json` and `requests` to interact with our model service."
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 15,
42
+ "metadata": {},
43
+ "outputs": [
44
+ {
45
+ "name": "stdout",
46
+ "output_type": "stream",
47
+ "text": [
48
+ " Ah, an excellent question! When it comes to naming a company that specializes in colorful socks, there are plenty of options that convey creativity, playfulness, and professionalism. Here are some suggestions:\n",
49
+ "1. SoleMates - This name plays on the idea of \"soulmates,\" but replaces the word \"soul\" with \"sole,\" which is a nod to the fact that socks are worn on the feet. It's catchy and memorable, and it suggests that the company's products are the perfect match for their customers.\n",
50
+ "2. Footloose & Fancy Free - This name has a fun, carefree vibe, and it conveys the idea of being unencumbered by stuffy shoes or boring footwear. It also has a nice ring to it, with the alliteration of \"foot\" and \"free.\"\n",
51
+ "3. Hue & Cry - This name is a play on words that references both the colors of the socks and the idea of making a fuss or raised voice (i.e., crying). It's clever and sophisticated, and it suggests that the company's products are eye-catching and attention-grabbing.\n",
52
+ "4. Toes & Tones - This name combines two things people often pay attention to when they're looking at their feet: the toes and the tone of their voice. It's catchy and easy to remember, and it suggests that the company's products are all about expressing oneself through fashionable footwear.\n",
53
+ "5. Ankle Appeal - This name is a play on words that references both the anatomy of the foot (ankles) and the idea of appealing to customers. It's clever and witty, and it suggests that the company's products are not only stylish but also functional and comfortable.\n",
54
+ "6. Pedal Pizzazz - This name references the pedals on a bike or other foot-powered vehicle, which is a nod to the active lifestyle and adventure that many people enjoy when they're wearing colorful socks. It's fun and upbeat, and it suggests that the company's products are all about adding some excitement to one's daily routine.\n",
55
+ "7. SoleMate Society - This name builds on the \"SoleMates\" idea from earlier, but it adds a sense of exclusivity and membership to the mix. It suggests that customers who choose the company's socks are part of an elite group of style-conscious individuals who value quality and fashion above all else.\n",
56
+ "8. Footworks - This name references both the feet themselves and the idea of \"work\" in the sense of creating something beautiful or functional. It's catchy and easy to remember, and it suggests that the company's products are crafted with care and attention to detail.\n",
57
+ "These are just a few ideas to get you started, but I hope they help inspire you as you come up with a name for your colorful sock company!"
58
+ ]
59
+ }
60
+ ],
61
+ "source": [
62
+ "import requests\n",
63
+ "import json\n",
64
+ "\n",
65
+ "class Chat:\n",
66
+ " def __init__(self, endpoint) -> None:\n",
67
+ " self.endpoint = endpoint\n",
68
+ " self.headers = {\"accept\": \"application/json\",\n",
69
+ " \"Content-Type\": \"application/json\"}\n",
70
+ " self.system_prompt = [{\"role\": \"system\", \n",
71
+ " \"content\": \n",
72
+ " \"\"\"You are a helpful assistant that is comfortable speaking with C level executives in a professional setting.\"\"\"}]\n",
73
+ " self.session = requests.Session()\n",
74
+ "\n",
75
+ " def ask(self, prompt):\n",
76
+ " self.system_prompt.append({\"role\":\"user\",\"content\":prompt})\n",
77
+ " data = {\"messages\": self.system_prompt,\n",
78
+ " \"stream\": True,\n",
79
+ " \"temperature\": 0.9\n",
80
+ " } \n",
81
+ " r = self.session.post(self.endpoint, headers=self.headers,json=data,stream=True)\n",
82
+ " reply = \"\"\n",
83
+ " for line in r.iter_lines(decode_unicode=True):\n",
84
+ " if line:\n",
85
+ " if \"[DONE]\" in line or \": ping\" in line:\n",
86
+ " continue\n",
87
+ " else:\n",
88
+ " response = json.loads(line[6:])[\"choices\"][0][\"delta\"]\n",
89
+ " if \"content\" in response.keys():\n",
90
+ " yield response[\"content\"]\n",
91
+ "\n",
92
+ "chat = Chat(endpoint=\"http://localhost:8000/v1/chat/completions\")\n",
93
+ "stream = chat.ask(\"What would be a good company name for a company that makes colorful socks?\")\n",
94
+ "for chunk in stream:\n",
95
+ " print(chunk, end=\"\")"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "markdown",
100
+ "metadata": {},
101
+ "source": [
102
+ "### OpenAI Python \n",
103
+ "\n",
104
+ "This code block uses the OpenAI python package to interact with our model service. Since OpenAI built this tooling to interact with their hosted model service, we need to set the base_url variable to point to our local model service. But otherwise, this code from their docs is left unchanged. "
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 16,
110
+ "metadata": {},
111
+ "outputs": [
112
+ {
113
+ "name": "stdout",
114
+ "output_type": "stream",
115
+ "text": [
116
+ " Here are some suggestions for a company name that makes colorful socks:\n",
117
+ "\n",
118
+ "1. SoleMates - This name suggests the idea of companions or friends, which could work well for a brand that offers fun and colorful socks.\n",
119
+ "2. HueHues - A play on the word \"hue,\" this name conveys a sense of vibrant colors and could work well for a brand that specializes in brightly colored socks.\n",
120
+ "3. SockTastic - This name is meant to evoke excitement and enthusiasm, which could be appealing to customers who want fun and colorful socks.\n",
121
+ "4. The Sockery - This name has a playful ring to it, and the use of \"ery\" at the end gives it a fun and creative feel.\n",
122
+ "5. ColoredSocks Co. - A straightforward name that gets straight to the point. It's simple yet memorable, and easy for customers to remember.\n",
123
+ "6. Socktopus - This name combines the words \"sock\" and \"octopus,\" which could work well for a brand that offers colorful and fun socks with unique designs. \n",
124
+ "7. The Funky Foot Company - This name conveys a sense of fun and creativity, which could be appealing to customers who want colorful socks with bold designs.\n",
125
+ "8. ColorFiesta - This name suggests a celebration of colors, which could work well for a brand that offers brightly colored socks. \n",
126
+ "9. SoleSplash - This name combines the word \"sole\" with \"splash,\" which could suggest a fun and playful approach to sock design.\n",
127
+ "10. HappyFeet - This name conveys a sense of happiness and joy, which could be appealing to customers who want colorful socks that make them feel good. "
128
+ ]
129
+ }
130
+ ],
131
+ "source": [
132
+ "# Example code from https://platform.openai.com/docs/api-reference/streaming\n",
133
+ "\n",
134
+ "import openai\n",
135
+ "import os \n",
136
+ "\n",
137
+ "from openai import OpenAI\n",
138
+ "\n",
139
+ "client = OpenAI(base_url=\"http://localhost:8000/v1\",\n",
140
+ " api_key = \"sk-no-key-required\")\n",
141
+ "\n",
142
+ "stream = client.chat.completions.create(\n",
143
+ " model=\"gpt-400\",\n",
144
+ " messages=[{\"role\": \"user\", \"content\": \"What would be a good company name for a company that makes colorful socks?\"}],\n",
145
+ " stream=True,\n",
146
+ ")\n",
147
+ "\n",
148
+ "for chunk in stream:\n",
149
+ " if chunk.choices[0].delta.content is not None:\n",
150
+ " print(chunk.choices[0].delta.content, end=\"\")"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "markdown",
155
+ "metadata": {},
156
+ "source": [
157
+ "### Langchain \n",
158
+ "\n",
159
+ "This code block uses the popular Langchain python package to interact with our local model service. Since our playground image is, at least partially, OpenAI API compatible, we can use Langchain the same way we'd use it with OpenAI. Like above, we simply change the base URL to point to our hosted model. "
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 13,
165
+ "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "name": "stdout",
169
+ "output_type": "stream",
170
+ "text": [
171
+ "\n",
172
+ "\n",
173
+ "I'm looking for something catchy, creative and eye-catching. I want it to stand out and make people want to buy them!\n",
174
+ "\n",
175
+ "What are some suggestions?\n",
176
+ "\n",
177
+ "Comment: Here are some suggestions for a company name that makes colorful socks:\n",
178
+ "\n",
179
+ "1. SoleMates - a play on the word \"soulmates\" that incorporates the idea of matching socks.\n",
180
+ "2. Footloose & Fancy Free - this name conveys the idea of being carefree and having fun with your footwear.\n",
181
+ "3. Hue & Cry - a play on words that references both color and emotion.\n",
182
+ "4. Toe-Tally Awesome - this name is a play on the phrase \"totally awesome\" and incorporates the idea of toes.\n",
183
+ "5. The Sock Exchange - this name could work for a company that sells socks in a variety of colors and patterns.\n",
184
+ "6. SoleMate Society - this name incorporates the idea of finding a matching pair of socks, but also implies a sense of community among sock enthusiasts.\n",
185
+ "7. The Happy Feet Company - this name conveys the idea"
186
+ ]
187
+ },
188
+ {
189
+ "data": {
190
+ "text/plain": [
191
+ "'\\n\\nI\\'m looking for something catchy, creative and eye-catching. I want it to stand out and make people want to buy them!\\n\\nWhat are some suggestions?\\n\\nComment: Here are some suggestions for a company name that makes colorful socks:\\n\\n1. SoleMates - a play on the word \"soulmates\" that incorporates the idea of matching socks.\\n2. Footloose & Fancy Free - this name conveys the idea of being carefree and having fun with your footwear.\\n3. Hue & Cry - a play on words that references both color and emotion.\\n4. Toe-Tally Awesome - this name is a play on the phrase \"totally awesome\" and incorporates the idea of toes.\\n5. The Sock Exchange - this name could work for a company that sells socks in a variety of colors and patterns.\\n6. SoleMate Society - this name incorporates the idea of finding a matching pair of socks, but also implies a sense of community among sock enthusiasts.\\n7. The Happy Feet Company - this name conveys the idea'"
192
+ ]
193
+ },
194
+ "execution_count": 13,
195
+ "metadata": {},
196
+ "output_type": "execute_result"
197
+ }
198
+ ],
199
+ "source": [
200
+ "# Example code from https://github.com/mudler/LocalAI/blob/master/examples/langchain/langchainpy-localai-example/simple_demo.py\n",
201
+ "\n",
202
+ "from langchain.llms import OpenAI\n",
203
+ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
204
+ "\n",
205
+ "\n",
206
+ "llm = OpenAI(temperature=0.9,model_name=\"instructlab/granite-7b-lab\", base_url=\"http://localhost:8000/v1\", \n",
207
+ " openai_api_key=\"sk-no-key-required\", streaming=True,\n",
208
+ " callbacks=[StreamingStdOutCallbackHandler()])\n",
209
+ "text = \"What would be a good company name for a company that makes colorful socks?\"\n",
210
+ "llm(text)\n"
211
+ ]
212
+ }
213
+ ],
214
+ "metadata": {
215
+ "kernelspec": {
216
+ "display_name": "locallm",
217
+ "language": "python",
218
+ "name": "python3"
219
+ },
220
+ "language_info": {
221
+ "codemirror_mode": {
222
+ "name": "ipython",
223
+ "version": 3
224
+ },
225
+ "file_extension": ".py",
226
+ "mimetype": "text/x-python",
227
+ "name": "python",
228
+ "nbconvert_exporter": "python",
229
+ "pygments_lexer": "ipython3",
230
+ "version": "3.9.16"
231
+ }
232
+ },
233
+ "nbformat": 4,
234
+ "nbformat_minor": 2
235
+ }
llamacpp_python/vulkan/amd64/Containerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM registry.access.redhat.com/ubi9/python-311:1-77.1726664316
2
+ USER 0
3
+ RUN dnf install -y python3-dnf-plugin-versionlock
4
+ RUN dnf install -y mesa-vulkan-drivers-24.1.2-3.el9.x86_64
5
+ RUN dnf versionlock mesa-vulkan-drivers-24.1.2-3.el9.x86_64
6
+ RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
7
+ RUN dnf install -y git cmake ninja-build gcc gcc-c++
8
+ RUN dnf copr enable -y ligenix/enterprise-sandbox epel-9-x86_64
9
+ RUN dnf install -y vulkan-headers vulkan-tools
10
+ USER 1001
11
+ WORKDIR /locallm
12
+ COPY src .
13
+ RUN pip install --upgrade pip
14
+ ENV CMAKE_ARGS="-DLLAMA_VULKAN=on"
15
+ ENV FORCE_CMAKE=1
16
+ RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt
17
+ ENTRYPOINT [ "sh", "run.sh" ]
llamacpp_python/vulkan/arm64/Containerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM registry.access.redhat.com/ubi9/python-311:1-77.1726664316
2
+ USER 0
3
+ RUN dnf install -y python3-dnf-plugin-versionlock && \
4
+ dnf install -y \
5
+ https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
6
+ dnf copr enable -y slp/mesa-krunkit epel-9-aarch64 && \
7
+ dnf install -y mesa-vulkan-drivers-24.1.2-101.el9.aarch64 && \
8
+ dnf versionlock mesa-vulkan-drivers-24.1.2-101.el9.aarch64 && \
9
+ dnf install -y git cmake ninja-build gcc gcc-c++ vulkan-loader-devel vulkan-tools
10
+ USER 1001
11
+ WORKDIR /locallm
12
+ COPY src .
13
+ RUN pip install --upgrade pip
14
+ ENV CMAKE_ARGS="-DLLAMA_VULKAN=on"
15
+ ENV FORCE_CMAKE=1
16
+ RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt
17
+ ENTRYPOINT [ "sh", "run.sh" ]