dolphin-phi / app.py
casperbankerson's picture
Add chat completion endpoint and initialize Llama model in FastAPI app; update requirements to include llama-cpp-python
1e7d5af
raw
history blame contribute delete
577 Bytes
# !pip install llama-cpp-python
from fastapi import FastAPI
from llama_cpp import Llama
app = FastAPI()
# Initialize the model
llm = Llama.from_pretrained(
repo_id="TheBloke/dolphin-2_6-phi-2-GGUF",
filename="dolphin-2_6-phi-2.Q3_K_S.gguf",
)
@app.get("/")
def greet_json():
return {"Hello": "World!"}
@app.post("/chat")
def chat_completion(prompt: str = "No input example has been defined for this model task."):
response = llm.create_chat_completion(
messages=[
{"role": "user", "content": prompt}
]
)
return response