askvoxllama-api / requirements.txt
cakebut's picture
Create requirements.txt
c62b4e7 verified
raw
history blame contribute delete
446 Bytes
from fastapi import FastAPI
from llama_cpp import Llama
app = FastAPI()
# Load your GGUF model
llm = Llama.from_pretrained("llama-2-7b-chat.Q4_K_M.gguf", n_ctx=1024)
@app.get("/")
def root():
return {"status": "ok"}
@app.post("/chat")
def chat(message: str):
completion = llm.create_chat_completion(messages=[{"role": "user", "content": message}], max_tokens=512)
return {"answer": completion['choices'][0]['message']['content']}