Spaces:

innovatorved
/

api

Sleeping

App Files Files Community

Ved Gupta commited on Jan 31, 2024

Commit

8afc3f4

·

0 Parent(s):

initial

Files changed (7) hide show

.gitignore +3 -0
Dockerfile +5 -0
README.md +27 -0
file.txt +14 -0
models/gpt-3.5-turbo.yaml +30 -0
models/luna-ai-llama2.tmpl +2 -0
setup.sh +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+*.gguf
+Modelfile
+models/*.gguf

Dockerfile ADDED Viewed

	@@ -0,0 +1,5 @@

+FROM quay.io/go-skynet/local-ai:v2.7.0-ffmpeg-core
+EXPOSE 8080
+CMD ["phi-2"]

README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+---
+title: llm-api
+emoji: ⚡
+colorFrom: indigo
+colorTo: yellow
+sdk: docker
+pinned: false
+app_port: 8080
+---
+```bash
+curl https://innovatorved-llm-api.hf.space/v1/models
+curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "gpt-3.5-turbo",
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9
+   }'
+curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
+    "model": "luna-ai-llama2",
+    "messages": [{"role": "user", "content": "How are you?"}],
+    "temperature": 0.9
+  }'
+```

file.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# Dockerfile
+FROM quay.io/go-skynet/local-ai:latest
+WORKDIR /app
+RUN mkdir models
+RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
+RUN wget -q "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_0.gguf" -O models/luna-ai-llama2
+COPY models/* models/
+EXPOSE 8080
+CMD ["--models-path", "/app/models", "--context-size", "700", "--threads", "4"]

models/gpt-3.5-turbo.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+name: gpt-3.5-turbo
+# Default model parameters
+parameters:
+  # Relative to the models path
+  model: mistral-7b-instruct-v0.2.Q4_0.gguf
+  # temperature
+  temperature: 0.3
+  # all the OpenAI request options here..
+# Default context size
+context_size: 512
+threads: 10
+# Enable prompt caching
+prompt_cache_path: "alpaca-cache"
+prompt_cache_all: true
+# stopwords (if supported by the backend)
+stopwords:
+- "HUMAN:"
+- "### Response:"
+# define chat roles
+roles:
+  assistant: '### Response:'
+  system: '### System Instruction:'
+  user: '### Instruction:'
+template:
+  # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
+  completion: completion
+  chat: chat

models/luna-ai-llama2.tmpl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {{.Input}}
2	+ ### Response:

setup.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+mkdir models
+wget "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
+docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4