Spaces:
Sleeping
Sleeping
Commit ·
17aa59f
1
Parent(s): 03d9166
migrated to groq -- suuuuuper fast!
Browse files- .gitignore +2 -1
- Dockerfile +23 -6
- main.py +25 -10
- requirements.txt +4 -0
.gitignore
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
__pycache__
|
| 2 |
venv
|
| 3 |
.vscode
|
|
|
|
| 4 |
# script for some housekeeping
|
| 5 |
-
f.py
|
|
|
|
| 1 |
__pycache__
|
| 2 |
venv
|
| 3 |
.vscode
|
| 4 |
+
.env
|
| 5 |
# script for some housekeeping
|
| 6 |
+
f.py
|
Dockerfile
CHANGED
|
@@ -1,9 +1,26 @@
|
|
| 1 |
-
FROM ollama/ollama
|
| 2 |
|
| 3 |
-
RUN mkdir -p /.ollama && chmod 777 /.ollama
|
| 4 |
|
| 5 |
-
ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
|
| 6 |
-
ENV OLLAMA_HOST "0.0.0.0:7860"
|
| 7 |
|
| 8 |
-
CMD ["serve"]
|
| 9 |
-
EXPOSE 7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FROM ollama/ollama
|
| 2 |
|
| 3 |
+
# RUN mkdir -p /.ollama && chmod 777 /.ollama
|
| 4 |
|
| 5 |
+
# ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
|
| 6 |
+
# ENV OLLAMA_HOST "0.0.0.0:7860"
|
| 7 |
|
| 8 |
+
# CMD ["serve"]
|
| 9 |
+
# EXPOSE 7860
|
| 10 |
+
|
| 11 |
+
FROM python:3
|
| 12 |
+
|
| 13 |
+
RUN useradd -m -u 1000 user
|
| 14 |
+
USER user
|
| 15 |
+
ENV HOME=/home/user \
|
| 16 |
+
PATH=/home/user/.local/bin:$PATH
|
| 17 |
+
|
| 18 |
+
COPY --chown=user . $HOME/LLM_API
|
| 19 |
+
|
| 20 |
+
WORKDIR $HOME/LLM_API
|
| 21 |
+
|
| 22 |
+
RUN mkdir $HOME/.cache
|
| 23 |
+
|
| 24 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 25 |
+
|
| 26 |
+
CMD ["gunicorn", "-w", "5", "-b", "0.0.0.0:7860","main:app"]
|
main.py
CHANGED
|
@@ -1,24 +1,39 @@
|
|
| 1 |
from flask import Flask
|
| 2 |
from flask import request
|
| 3 |
-
from
|
|
|
|
|
|
|
| 4 |
|
| 5 |
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
llm = Ollama(model="phi3")
|
| 8 |
|
| 9 |
-
@app.route(
|
| 10 |
def completion():
|
| 11 |
"""
|
| 12 |
{
|
| 13 |
-
|
| 14 |
-
|
| 15 |
}
|
| 16 |
"""
|
| 17 |
-
|
| 18 |
message = request.get_json()
|
| 19 |
-
llm_output = llm.invoke(message['text'])
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
# curl -v -X POST 'http://127.0.0.1:8000/
|
|
|
|
| 1 |
from flask import Flask
|
| 2 |
from flask import request
|
| 3 |
+
from groq import Groq
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
|
| 7 |
app = Flask(__name__)
|
| 8 |
+
client = Groq(
|
| 9 |
+
api_key=os.environ.get("GROQ_API_KEY")
|
| 10 |
+
)
|
| 11 |
|
|
|
|
| 12 |
|
| 13 |
+
@app.route("/api/generate", methods=['POST'])
|
| 14 |
def completion():
|
| 15 |
"""
|
| 16 |
{
|
| 17 |
+
"model": "llama3-70b-8192",
|
| 18 |
+
"prompt": "why is the sky blue?"
|
| 19 |
}
|
| 20 |
"""
|
| 21 |
+
|
| 22 |
message = request.get_json()
|
|
|
|
| 23 |
|
| 24 |
+
model = message['model']
|
| 25 |
+
prompt = message['prompt']
|
| 26 |
+
|
| 27 |
+
chat_completion = client.chat.completions.create(
|
| 28 |
+
messages=[
|
| 29 |
+
{
|
| 30 |
+
"role": "user",
|
| 31 |
+
"content": prompt,
|
| 32 |
+
}
|
| 33 |
+
],
|
| 34 |
+
model=model,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
return chat_completion.choices[0].message.content
|
| 38 |
|
| 39 |
+
# curl -v -X POST 'http://127.0.0.1:8000/api/generate' --header 'Content-Type: application/json' --data '{"model": "llama3-70b-8192", "prompt": "why is sky blue?"}'
|
requirements.txt
CHANGED
|
@@ -9,6 +9,7 @@ charset-normalizer==3.3.2
|
|
| 9 |
click==8.1.7
|
| 10 |
dataclasses-json==0.6.6
|
| 11 |
diskcache==5.6.3
|
|
|
|
| 12 |
dnspython==2.6.1
|
| 13 |
email_validator==2.1.1
|
| 14 |
fastapi==0.111.0
|
|
@@ -18,6 +19,7 @@ Flask==3.0.3
|
|
| 18 |
frozenlist==1.4.1
|
| 19 |
fsspec==2024.5.0
|
| 20 |
greenlet==3.0.3
|
|
|
|
| 21 |
gunicorn==22.0.0
|
| 22 |
h11==0.14.0
|
| 23 |
httpcore==1.0.5
|
|
@@ -58,6 +60,7 @@ referencing==0.35.1
|
|
| 58 |
requests==2.31.0
|
| 59 |
rich==13.7.1
|
| 60 |
rpds-py==0.18.1
|
|
|
|
| 61 |
shellingham==1.5.4
|
| 62 |
sniffio==1.3.1
|
| 63 |
SQLAlchemy==2.0.30
|
|
@@ -76,4 +79,5 @@ uvloop==0.19.0
|
|
| 76 |
watchfiles==0.21.0
|
| 77 |
websockets==12.0
|
| 78 |
Werkzeug==3.0.3
|
|
|
|
| 79 |
yarl==1.9.4
|
|
|
|
| 9 |
click==8.1.7
|
| 10 |
dataclasses-json==0.6.6
|
| 11 |
diskcache==5.6.3
|
| 12 |
+
distro==1.9.0
|
| 13 |
dnspython==2.6.1
|
| 14 |
email_validator==2.1.1
|
| 15 |
fastapi==0.111.0
|
|
|
|
| 19 |
frozenlist==1.4.1
|
| 20 |
fsspec==2024.5.0
|
| 21 |
greenlet==3.0.3
|
| 22 |
+
groq==0.8.0
|
| 23 |
gunicorn==22.0.0
|
| 24 |
h11==0.14.0
|
| 25 |
httpcore==1.0.5
|
|
|
|
| 60 |
requests==2.31.0
|
| 61 |
rich==13.7.1
|
| 62 |
rpds-py==0.18.1
|
| 63 |
+
setuptools==70.0.0
|
| 64 |
shellingham==1.5.4
|
| 65 |
sniffio==1.3.1
|
| 66 |
SQLAlchemy==2.0.30
|
|
|
|
| 79 |
watchfiles==0.21.0
|
| 80 |
websockets==12.0
|
| 81 |
Werkzeug==3.0.3
|
| 82 |
+
wheel==0.43.0
|
| 83 |
yarl==1.9.4
|