feat(API): Implemented basic API functionality.
Browse files- .dockerignore +0 -0
- .gitattributes +2 -0
- .gitignore +2 -1
- Dockerfile +14 -0
- requirements.txt +4 -0
- src/api_run.py +13 -0
- src/handlers.py +39 -0
- src/modules/data_models.py +9 -0
- src/modules/dialog_system.py +46 -0
.dockerignore
ADDED
|
File without changes
|
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
models/zephyr-7b-beta.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
file-path filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -14,6 +14,7 @@ dist/
|
|
| 14 |
downloads/
|
| 15 |
eggs/
|
| 16 |
.eggs/
|
|
|
|
| 17 |
lib/
|
| 18 |
lib64/
|
| 19 |
parts/
|
|
@@ -120,7 +121,7 @@ celerybeat.pid
|
|
| 120 |
*.sage.py
|
| 121 |
|
| 122 |
# Environments
|
| 123 |
-
.
|
| 124 |
.venv
|
| 125 |
env/
|
| 126 |
venv/
|
|
|
|
| 14 |
downloads/
|
| 15 |
eggs/
|
| 16 |
.eggs/
|
| 17 |
+
.idea/
|
| 18 |
lib/
|
| 19 |
lib64/
|
| 20 |
parts/
|
|
|
|
| 121 |
*.sage.py
|
| 122 |
|
| 123 |
# Environments
|
| 124 |
+
config.yaml
|
| 125 |
.venv
|
| 126 |
env/
|
| 127 |
venv/
|
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9
|
| 2 |
+
|
| 3 |
+
COPY requirements.txt ./requirements.txt
|
| 4 |
+
|
| 5 |
+
RUN python -m pip install -U pip && \
|
| 6 |
+
python -m pip install -r requirements.txt && \
|
| 7 |
+
python -m pip cache purge
|
| 8 |
+
|
| 9 |
+
COPY ./src /app/src
|
| 10 |
+
COPY ./models /app/models
|
| 11 |
+
|
| 12 |
+
WORKDIR /app
|
| 13 |
+
|
| 14 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.85.2
|
| 2 |
+
uvicorn==0.20.0
|
| 3 |
+
llama-cpp-python==0.2.53
|
| 4 |
+
PyYAML==6.0.1
|
src/api_run.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
|
| 3 |
+
from src.handlers import router
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_application() -> FastAPI:
|
| 7 |
+
application = FastAPI()
|
| 8 |
+
application.include_router(router)
|
| 9 |
+
|
| 10 |
+
return application
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
app = get_application()
|
src/handlers.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import yaml
|
| 2 |
+
|
| 3 |
+
from llama_cpp import Llama
|
| 4 |
+
from fastapi import APIRouter, status
|
| 5 |
+
from fastapi.responses import JSONResponse
|
| 6 |
+
|
| 7 |
+
from src.modules.dialog_system import ConversationHandler, MessageRole
|
| 8 |
+
from src.modules.data_models import UserMessage, AnswerMessage
|
| 9 |
+
|
| 10 |
+
router = APIRouter()
|
| 11 |
+
|
| 12 |
+
with open('config.yml', 'r') as file:
|
| 13 |
+
router.config = yaml.safe_load(file)
|
| 14 |
+
|
| 15 |
+
router.llm = Llama(
|
| 16 |
+
model_path=router.config['model_path'],
|
| 17 |
+
n_ctx=int(router.config['context_tokens']),
|
| 18 |
+
max_answer_len=int(router.config['max_answer_tokens'])
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
router.conversation = ConversationHandler(
|
| 22 |
+
model=router.llm,
|
| 23 |
+
message_role=MessageRole
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@router.get("v1/service/status", status_code=status.HTTP_200_OK)
|
| 28 |
+
async def health() -> AnswerMessage:
|
| 29 |
+
return AnswerMessage(message="OK")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@router.get("v1/chat/completions", response_model=AnswerMessage)
|
| 33 |
+
async def chat_completions(user_message: UserMessage) -> AnswerMessage:
|
| 34 |
+
try:
|
| 35 |
+
router.conversation.send_message(user_message.prompt)
|
| 36 |
+
response = router.conversation.generate_reply()
|
| 37 |
+
return AnswerMessage(message=response)
|
| 38 |
+
except Exception as e:
|
| 39 |
+
return JSONResponse(status_code=500, content={"message": str(e)})
|
src/modules/data_models.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class UserMessage(BaseModel):
|
| 5 |
+
prompt: str
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class AnswerMessage(BaseModel):
|
| 9 |
+
message: str
|
src/modules/dialog_system.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
|
| 3 |
+
from llama_cpp import Llama
|
| 4 |
+
from llama_cpp import ChatCompletionRequestMessage as Message
|
| 5 |
+
from llama_cpp import ChatCompletionRequestSystemMessage as SystemMessage
|
| 6 |
+
from llama_cpp import ChatCompletionRequestAssistantMessage as AssistantMessage
|
| 7 |
+
from llama_cpp import ChatCompletionRequestUserMessage as UserMessage
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class MessageRole:
|
| 12 |
+
ASSISTANT: str = "assistant"
|
| 13 |
+
SYSTEM: str = "system"
|
| 14 |
+
USER: str = "user"
|
| 15 |
+
EXIT: str = "exit"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ConversationHandler:
|
| 19 |
+
def __init__(self, model: Llama, message_role: MessageRole) -> None:
|
| 20 |
+
self.model: Llama = model
|
| 21 |
+
self.message_role = message_role
|
| 22 |
+
self.messages: list[Message] = [
|
| 23 |
+
SystemMessage(
|
| 24 |
+
role=self.message_role.SYSTEM,
|
| 25 |
+
content='You are a helpful developer assistant, answer all the questions correctly and concisely.'
|
| 26 |
+
),
|
| 27 |
+
AssistantMessage(role=self.message_role.ASSISTANT, content='Hello, do you have any question?'),
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
def send_message(self, content: str):
|
| 31 |
+
new_message = UserMessage(role=self.message_role.USER, content=content)
|
| 32 |
+
self.messages.append(new_message)
|
| 33 |
+
|
| 34 |
+
def generate_reply(self) -> str:
|
| 35 |
+
response = self.model.create_chat_completion(
|
| 36 |
+
messages=self.messages,
|
| 37 |
+
temperature=0.7,
|
| 38 |
+
top_p=0.9,
|
| 39 |
+
top_k=20,
|
| 40 |
+
max_tokens=128
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
response_content = response['choices'][0]['message']
|
| 44 |
+
self.messages.append(AssistantMessage(role=self.message_role.ASSISTANT, content=response_content))
|
| 45 |
+
|
| 46 |
+
return response_content
|