erlanio commited on
Commit
b5cb0d7
·
verified ·
1 Parent(s): f7a250a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -7
app.py CHANGED
@@ -1,7 +1,79 @@
1
- from fastapi import FastAPI
2
-
3
- app = FastAPI()
4
-
5
- @app.get("/")
6
- def greet_json():
7
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ # Definição do modelo de dados de entrada
7
+ class Question(BaseModel):
8
+ text: str
9
+
10
+ # Inicializando o FastAPI
11
+ app = FastAPI()
12
+
13
+ # Download e configuração do modelo
14
+ model_name_or_path = "FabioSantos/llama3_1_fn"
15
+ model_basename = "unsloth.Q8_0.gguf"
16
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
17
+ print(f"Model path: {model_path}")
18
+
19
+ # Configuração do modelo com llama_cpp
20
+ lcpp_llm = Llama(
21
+ model_path=model_path,
22
+ n_threads=2,
23
+ n_batch=512,
24
+ n_gpu_layers=-1,
25
+ n_ctx=4096,
26
+ )
27
+
28
+ # Formato de prompt utilizado no fine-tuning
29
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
30
+
31
+ ### Instruction:
32
+ {}
33
+
34
+ ### Input:
35
+ {}
36
+
37
+ ### Response:
38
+ {}"""
39
+
40
+ def get_response(text: str) -> str:
41
+ # Formatar o prompt usando o mesmo template utilizado no fine-tuning
42
+ formatted_prompt = alpaca_prompt.format(
43
+ "Você é um assistente do serviço de atendimento ao cliente que deve responder as perguntas dos clientes",
44
+ text,
45
+ ""
46
+ )
47
+ response = lcpp_llm(
48
+ prompt=formatted_prompt,
49
+ max_tokens=256,
50
+ temperature=0.5,
51
+ top_p=0.95,
52
+ top_k=50,
53
+ stop=['### Response:'], # Usar "### Response:" como token de parada
54
+ echo=True
55
+ )
56
+ response_text = response['choices'][0]['text']
57
+
58
+ # Extrair a resposta após "### Response:"
59
+ if "### Response:" in response_text:
60
+ answer = response_text.split("### Response:")[1].strip()
61
+ else:
62
+ answer = response_text.strip()
63
+
64
+ print(f"Final Answer: {answer}")
65
+ return answer
66
+
67
+
68
+ # Endpoint para receber uma questão e retornar a resposta
69
+ @app.post("/ask")
70
+ def ask_question(question: Question):
71
+ response = get_response(question.text)
72
+ return {"response": response}
73
+
74
+ # Executa a aplicação
75
+ if __name__ == "__main__":
76
+ import uvicorn
77
+ uvicorn.run(app, host="0.0.0.0", port=8000)
78
+
79
+