Spaces:
Sleeping
Sleeping
Ved Gupta
commited on
Commit
·
8afc3f4
0
Parent(s):
initial
Browse files- .gitignore +3 -0
- Dockerfile +5 -0
- README.md +27 -0
- file.txt +14 -0
- models/gpt-3.5-turbo.yaml +30 -0
- models/luna-ai-llama2.tmpl +2 -0
- setup.sh +6 -0
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.gguf
|
| 2 |
+
Modelfile
|
| 3 |
+
models/*.gguf
|
Dockerfile
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM quay.io/go-skynet/local-ai:v2.7.0-ffmpeg-core
|
| 2 |
+
|
| 3 |
+
EXPOSE 8080
|
| 4 |
+
|
| 5 |
+
CMD ["phi-2"]
|
README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: llm-api
|
| 3 |
+
emoji: ⚡
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
app_port: 8080
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
```bash
|
| 13 |
+
curl https://innovatorved-llm-api.hf.space/v1/models
|
| 14 |
+
|
| 15 |
+
curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
|
| 16 |
+
"model": "gpt-3.5-turbo",
|
| 17 |
+
"messages": [{"role": "user", "content": "How are you?"}],
|
| 18 |
+
"temperature": 0.9
|
| 19 |
+
}'
|
| 20 |
+
|
| 21 |
+
curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
|
| 22 |
+
"model": "luna-ai-llama2",
|
| 23 |
+
"messages": [{"role": "user", "content": "How are you?"}],
|
| 24 |
+
"temperature": 0.9
|
| 25 |
+
}'
|
| 26 |
+
|
| 27 |
+
```
|
file.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile
|
| 2 |
+
FROM quay.io/go-skynet/local-ai:latest
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
RUN mkdir models
|
| 7 |
+
|
| 8 |
+
RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
| 9 |
+
RUN wget -q "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_0.gguf" -O models/luna-ai-llama2
|
| 10 |
+
COPY models/* models/
|
| 11 |
+
|
| 12 |
+
EXPOSE 8080
|
| 13 |
+
|
| 14 |
+
CMD ["--models-path", "/app/models", "--context-size", "700", "--threads", "4"]
|
models/gpt-3.5-turbo.yaml
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: gpt-3.5-turbo
|
| 2 |
+
# Default model parameters
|
| 3 |
+
parameters:
|
| 4 |
+
# Relative to the models path
|
| 5 |
+
model: mistral-7b-instruct-v0.2.Q4_0.gguf
|
| 6 |
+
# temperature
|
| 7 |
+
temperature: 0.3
|
| 8 |
+
# all the OpenAI request options here..
|
| 9 |
+
|
| 10 |
+
# Default context size
|
| 11 |
+
context_size: 512
|
| 12 |
+
threads: 10
|
| 13 |
+
|
| 14 |
+
# Enable prompt caching
|
| 15 |
+
prompt_cache_path: "alpaca-cache"
|
| 16 |
+
prompt_cache_all: true
|
| 17 |
+
|
| 18 |
+
# stopwords (if supported by the backend)
|
| 19 |
+
stopwords:
|
| 20 |
+
- "HUMAN:"
|
| 21 |
+
- "### Response:"
|
| 22 |
+
# define chat roles
|
| 23 |
+
roles:
|
| 24 |
+
assistant: '### Response:'
|
| 25 |
+
system: '### System Instruction:'
|
| 26 |
+
user: '### Instruction:'
|
| 27 |
+
template:
|
| 28 |
+
# template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
|
| 29 |
+
completion: completion
|
| 30 |
+
chat: chat
|
models/luna-ai-llama2.tmpl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{.Input}}
|
| 2 |
+
### Response:
|
setup.sh
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mkdir models
|
| 2 |
+
|
| 3 |
+
wget "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
| 4 |
+
|
| 5 |
+
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
|
| 6 |
+
|