Spaces:

FabioSantos
/

api_llama3.1

Sleeping

App Files Files Community

FabioSantos commited on Aug 2, 2024

Commit

b0d65ea

verified ·

1 Parent(s): 5da4949

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -14

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 # Definição do modelo de dados de entrada
 class Question(BaseModel):
@@ -14,8 +15,12 @@ app = FastAPI()
 model_name_or_path = "FabioSantos/llama3_1_fn"
 model_basename = "unsloth.Q8_0.gguf"
 model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
-print(model_path)
 lcpp_llm = Llama(
     model_path=model_path,
     n_threads=2,
@@ -24,23 +29,43 @@ lcpp_llm = Llama(
     n_ctx=4096,
 )
-prompt_template = "Responda as questões.\nHuman: {prompt}\nAssistant:\n"
 def get_response(text: str) -> str:
-    prompt = prompt_template.format(prompt=text)
     response = lcpp_llm(
-    prompt=prompt,
-    max_tokens=256,
-    temperature=0.5,
-    top_p=0.95,
-    top_k=50,
-    stop = ['<|end_of_text|>'], # Dynamic stopping when such token is detected.
-    echo=True # return the prompt
     )
-    print(response)
-    return response['choices'][0]['text'].split('Assistant:\n')[1]
 # Endpoint para receber uma questão e retornar a resposta
 @app.post("/ask")
@@ -52,3 +77,4 @@ def ask_question(question: Question):
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+from transformers import AutoTokenizer
 # Definição do modelo de dados de entrada
 class Question(BaseModel):
 model_name_or_path = "FabioSantos/llama3_1_fn"
 model_basename = "unsloth.Q8_0.gguf"
 model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
+print(f"Model path: {model_path}")
+# Carregar o tokenizador
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+# Configurar o modelo
 lcpp_llm = Llama(
     model_path=model_path,
     n_threads=2,
     n_ctx=4096,
 )
+# Formato de prompt utilizado no fine-tuning
+alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{}
+### Input:
+{}
+### Response:
+{}"""
+EOS_TOKEN = tokenizer.eos_token  # Token de final de resposta
 def get_response(text: str) -> str:
+    # Formatar o prompt usando o mesmo template utilizado no fine-tuning
+    formatted_prompt = alpaca_prompt.format("Answer the question", text, "") + EOS_TOKEN
     response = lcpp_llm(
+        prompt=formatted_prompt,
+        max_tokens=256,
+        temperature=0.5,
+        top_p=0.95,
+        top_k=50,
+        stop=[EOS_TOKEN],  # Usar EOS_TOKEN como token de parada
+        echo=True
     )
+    print(f"Raw Response: {response}")
+    try:
+        response_text = response['choices'][0]['text']
+        print(f"Response Text: {response_text}")
+        answer = response_text.split("### Response:\n")[1].strip()
+    except (KeyError, IndexError) as e:
+        print(f"Error parsing response: {e}")
+        answer = "Desculpe, não consegui entender a resposta."
+    print(f"Final Answer: {answer}")
+    return answer
 # Endpoint para receber uma questão e retornar a resposta
 @app.post("/ask")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)