# !pip install llama-cpp-python

from fastapi import FastAPI
from llama_cpp import Llama

app = FastAPI()

# Initialize the model
llm = Llama.from_pretrained(
    repo_id="TheBloke/dolphin-2_6-phi-2-GGUF",
    filename="dolphin-2_6-phi-2.Q3_K_S.gguf",
)

@app.get("/")
def greet_json():
    return {"Hello": "World!"}

@app.post("/chat")
def chat_completion(prompt: str = "No input example has been defined for this model task."):
    response = llm.create_chat_completion(
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return response