erlanio commited on
Commit
f62231f
·
verified ·
1 Parent(s): b5cb0d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -79
app.py CHANGED
@@ -1,79 +1,79 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
- from huggingface_hub import hf_hub_download
4
- from llama_cpp import Llama
5
-
6
- # Definição do modelo de dados de entrada
7
- class Question(BaseModel):
8
- text: str
9
-
10
- # Inicializando o FastAPI
11
- app = FastAPI()
12
-
13
- # Download e configuração do modelo
14
- model_name_or_path = "FabioSantos/llama3_1_fn"
15
- model_basename = "unsloth.Q8_0.gguf"
16
- model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
17
- print(f"Model path: {model_path}")
18
-
19
- # Configuração do modelo com llama_cpp
20
- lcpp_llm = Llama(
21
- model_path=model_path,
22
- n_threads=2,
23
- n_batch=512,
24
- n_gpu_layers=-1,
25
- n_ctx=4096,
26
- )
27
-
28
- # Formato de prompt utilizado no fine-tuning
29
- alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
30
-
31
- ### Instruction:
32
- {}
33
-
34
- ### Input:
35
- {}
36
-
37
- ### Response:
38
- {}"""
39
-
40
- def get_response(text: str) -> str:
41
- # Formatar o prompt usando o mesmo template utilizado no fine-tuning
42
- formatted_prompt = alpaca_prompt.format(
43
- "Você é um assistente do serviço de atendimento ao cliente que deve responder as perguntas dos clientes",
44
- text,
45
- ""
46
- )
47
- response = lcpp_llm(
48
- prompt=formatted_prompt,
49
- max_tokens=256,
50
- temperature=0.5,
51
- top_p=0.95,
52
- top_k=50,
53
- stop=['### Response:'], # Usar "### Response:" como token de parada
54
- echo=True
55
- )
56
- response_text = response['choices'][0]['text']
57
-
58
- # Extrair a resposta após "### Response:"
59
- if "### Response:" in response_text:
60
- answer = response_text.split("### Response:")[1].strip()
61
- else:
62
- answer = response_text.strip()
63
-
64
- print(f"Final Answer: {answer}")
65
- return answer
66
-
67
-
68
- # Endpoint para receber uma questão e retornar a resposta
69
- @app.post("/ask")
70
- def ask_question(question: Question):
71
- response = get_response(question.text)
72
- return {"response": response}
73
-
74
- # Executa a aplicação
75
- if __name__ == "__main__":
76
- import uvicorn
77
- uvicorn.run(app, host="0.0.0.0", port=8000)
78
-
79
-
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ # Definição do modelo de dados de entrada
7
+ class Question(BaseModel):
8
+ text: str
9
+
10
+ # Inicializando o FastAPI
11
+ app = FastAPI()
12
+
13
+ # Download e configuração do modelo
14
+ model_name_or_path = "erlanio/prisebot"
15
+ model_basename = "unsloth.Q4_K_M.gguf"
16
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
17
+ print(f"Model path: {model_path}")
18
+
19
+ # Configuração do modelo com llama_cpp
20
+ lcpp_llm = Llama(
21
+ model_path=model_path,
22
+ n_threads=2,
23
+ n_batch=512,
24
+ n_gpu_layers=-1,
25
+ n_ctx=4096,
26
+ )
27
+
28
+ # Formato de prompt utilizado no fine-tuning
29
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
30
+
31
+ ### Instruction:
32
+ {}
33
+
34
+ ### Input:
35
+ {}
36
+
37
+ ### Response:
38
+ {}"""
39
+
40
+ def get_response(text: str) -> str:
41
+ # Formatar o prompt usando o mesmo template utilizado no fine-tuning
42
+ formatted_prompt = alpaca_prompt.format(
43
+ "Você é um assistente do serviço de atendimento ao cliente que deve responder as perguntas dos clientes",
44
+ text,
45
+ ""
46
+ )
47
+ response = lcpp_llm(
48
+ prompt=formatted_prompt,
49
+ max_tokens=256,
50
+ temperature=0.5,
51
+ top_p=0.95,
52
+ top_k=50,
53
+ stop=['### Response:'], # Usar "### Response:" como token de parada
54
+ echo=True
55
+ )
56
+ response_text = response['choices'][0]['text']
57
+
58
+ # Extrair a resposta após "### Response:"
59
+ if "### Response:" in response_text:
60
+ answer = response_text.split("### Response:")[1].strip()
61
+ else:
62
+ answer = response_text.strip()
63
+
64
+ print(f"Final Answer: {answer}")
65
+ return answer
66
+
67
+
68
+ # Endpoint para receber uma questão e retornar a resposta
69
+ @app.post("/ask")
70
+ def ask_question(question: Question):
71
+ response = get_response(question.text)
72
+ return {"response": response}
73
+
74
+ # Executa a aplicação
75
+ if __name__ == "__main__":
76
+ import uvicorn
77
+ uvicorn.run(app, host="0.0.0.0", port=8000)
78
+
79
+