|
|
from fastapi import FastAPI |
|
|
from pydantic import BaseModel |
|
|
from huggingface_hub import hf_hub_download |
|
|
from llama_cpp import Llama |
|
|
|
|
|
|
|
|
class Question(BaseModel): |
|
|
text: str |
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
model_name_or_path = "FabioSantos/llama3_1_fn" |
|
|
model_basename = "unsloth.Q8_0.gguf" |
|
|
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename) |
|
|
print(f"Model path: {model_path}") |
|
|
|
|
|
|
|
|
lcpp_llm = Llama( |
|
|
model_path=model_path, |
|
|
n_threads=2, |
|
|
n_batch=512, |
|
|
n_gpu_layers=-1, |
|
|
n_ctx=4096, |
|
|
) |
|
|
|
|
|
|
|
|
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. |
|
|
|
|
|
### Instruction: |
|
|
{} |
|
|
|
|
|
### Input: |
|
|
{} |
|
|
|
|
|
### Response: |
|
|
{}""" |
|
|
|
|
|
def get_response(text: str) -> str: |
|
|
|
|
|
formatted_prompt = alpaca_prompt.format( |
|
|
"Você é um assistente do serviço de atendimento ao cliente que deve responder as perguntas dos clientes", |
|
|
text, |
|
|
"" |
|
|
) |
|
|
response = lcpp_llm( |
|
|
prompt=formatted_prompt, |
|
|
max_tokens=256, |
|
|
temperature=0.5, |
|
|
top_p=0.95, |
|
|
top_k=50, |
|
|
stop=['### Response:'], |
|
|
echo=True |
|
|
) |
|
|
response_text = response['choices'][0]['text'] |
|
|
|
|
|
|
|
|
if "### Response:" in response_text: |
|
|
answer = response_text.split("### Response:")[1].strip() |
|
|
else: |
|
|
answer = response_text.strip() |
|
|
|
|
|
print(f"Final Answer: {answer}") |
|
|
return answer |
|
|
|
|
|
|
|
|
|
|
|
@app.post("/ask") |
|
|
def ask_question(question: Question): |
|
|
response = get_response(question.text) |
|
|
return {"response": response} |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=8000) |