FabioSantos commited on
Commit
b0d65ea
·
verified ·
1 Parent(s): 5da4949

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -14
app.py CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
 
5
 
6
  # Definição do modelo de dados de entrada
7
  class Question(BaseModel):
@@ -14,8 +15,12 @@ app = FastAPI()
14
  model_name_or_path = "FabioSantos/llama3_1_fn"
15
  model_basename = "unsloth.Q8_0.gguf"
16
  model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
17
- print(model_path)
18
 
 
 
 
 
19
  lcpp_llm = Llama(
20
  model_path=model_path,
21
  n_threads=2,
@@ -24,23 +29,43 @@ lcpp_llm = Llama(
24
  n_ctx=4096,
25
  )
26
 
27
- prompt_template = "Responda as questões.\nHuman: {prompt}\nAssistant:\n"
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def get_response(text: str) -> str:
30
- prompt = prompt_template.format(prompt=text)
 
31
  response = lcpp_llm(
32
- prompt=prompt,
33
- max_tokens=256,
34
- temperature=0.5,
35
- top_p=0.95,
36
- top_k=50,
37
- stop = ['<|end_of_text|>'], # Dynamic stopping when such token is detected.
38
- echo=True # return the prompt
39
  )
40
- print(response)
41
- return response['choices'][0]['text'].split('Assistant:\n')[1]
42
-
43
-
 
 
 
 
 
 
 
44
 
45
  # Endpoint para receber uma questão e retornar a resposta
46
  @app.post("/ask")
@@ -52,3 +77,4 @@ def ask_question(question: Question):
52
  if __name__ == "__main__":
53
  import uvicorn
54
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
2
  from pydantic import BaseModel
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
+ from transformers import AutoTokenizer
6
 
7
  # Definição do modelo de dados de entrada
8
  class Question(BaseModel):
 
15
  model_name_or_path = "FabioSantos/llama3_1_fn"
16
  model_basename = "unsloth.Q8_0.gguf"
17
  model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
18
+ print(f"Model path: {model_path}")
19
 
20
+ # Carregar o tokenizador
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
22
+
23
+ # Configurar o modelo
24
  lcpp_llm = Llama(
25
  model_path=model_path,
26
  n_threads=2,
 
29
  n_ctx=4096,
30
  )
31
 
32
+ # Formato de prompt utilizado no fine-tuning
33
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
34
+
35
+ ### Instruction:
36
+ {}
37
+
38
+ ### Input:
39
+ {}
40
+
41
+ ### Response:
42
+ {}"""
43
+
44
+ EOS_TOKEN = tokenizer.eos_token # Token de final de resposta
45
 
46
  def get_response(text: str) -> str:
47
+ # Formatar o prompt usando o mesmo template utilizado no fine-tuning
48
+ formatted_prompt = alpaca_prompt.format("Answer the question", text, "") + EOS_TOKEN
49
  response = lcpp_llm(
50
+ prompt=formatted_prompt,
51
+ max_tokens=256,
52
+ temperature=0.5,
53
+ top_p=0.95,
54
+ top_k=50,
55
+ stop=[EOS_TOKEN], # Usar EOS_TOKEN como token de parada
56
+ echo=True
57
  )
58
+ print(f"Raw Response: {response}")
59
+ try:
60
+ response_text = response['choices'][0]['text']
61
+ print(f"Response Text: {response_text}")
62
+ answer = response_text.split("### Response:\n")[1].strip()
63
+ except (KeyError, IndexError) as e:
64
+ print(f"Error parsing response: {e}")
65
+ answer = "Desculpe, não consegui entender a resposta."
66
+
67
+ print(f"Final Answer: {answer}")
68
+ return answer
69
 
70
  # Endpoint para receber uma questão e retornar a resposta
71
  @app.post("/ask")
 
77
  if __name__ == "__main__":
78
  import uvicorn
79
  uvicorn.run(app, host="0.0.0.0", port=8000)
80
+