FabioSantos commited on
Commit
f0c7042
·
verified ·
1 Parent(s): 9d4cd80

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -0
  2. api.py +78 -0
  3. requirements.txt +0 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
14
+
api.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ # Definição do modelo de dados de entrada
7
+ class Question(BaseModel):
8
+ text: str
9
+
10
+ # Inicializando o FastAPI
11
+ app = FastAPI()
12
+
13
+ # Download e configuração do modelo
14
+ model_name_or_path = "FabioSantos/curso_Finetune_Llama3.2_v1"
15
+ model_basename = "unsloth.Q8_0.gguf"
16
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
17
+ print(f"Model path: {model_path}")
18
+
19
+ # Configuração do modelo com llama_cpp
20
+ lcpp_llm = Llama(
21
+ model_path=model_path,
22
+ n_threads=2,
23
+ n_batch=512,
24
+ n_gpu_layers=-1,
25
+ n_ctx=4096,
26
+ )
27
+
28
+ # Formato de prompt utilizado no fine-tuning
29
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
30
+
31
+ ### Instruction:
32
+ {}
33
+
34
+ ### Input:
35
+ {}
36
+
37
+ ### Response:
38
+ {}"""
39
+
40
+ def get_response(text: str) -> str:
41
+ # Formatar o prompt usando o mesmo template utilizado no fine-tuning
42
+ formatted_prompt = alpaca_prompt.format(
43
+ "Você é um assistente do serviço de atendimento ao cliente que deve responder as perguntas dos clientes",
44
+ text,
45
+ ""
46
+ )
47
+ response = lcpp_llm(
48
+ prompt=formatted_prompt,
49
+ max_tokens=64,
50
+ temperature=0.4,
51
+ top_p=0.95,
52
+ top_k=50,
53
+ stop=['### Response:'], # Usar "### Response:" como token de parada
54
+ echo=True
55
+ )
56
+ response_text = response['choices'][0]['text']
57
+
58
+ # Extrair a resposta após "### Response:"
59
+ if "### Response:" in response_text:
60
+ answer = response_text.split("### Response:")[1].strip()
61
+ else:
62
+ answer = response_text.strip()
63
+
64
+ print(f"Final Answer: {answer}")
65
+ return answer
66
+
67
+
68
+ # Endpoint para receber uma questão e retornar a resposta
69
+ @app.post("/ask")
70
+ def ask_question(question: Question):
71
+ response = get_response(question.text)
72
+ return {"response": response}
73
+
74
+ # Executa a aplicação
75
+ if __name__ == "__main__":
76
+ import uvicorn
77
+ uvicorn.run(app, host="0.0.0.0", port=8000)
78
+
requirements.txt ADDED
Binary file (140 Bytes). View file