FabioSantos commited on
Commit
d07d2b8
·
verified ·
1 Parent(s): fe4adcb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ # Definição do modelo de dados de entrada
7
+ class Question(BaseModel):
8
+ text: str
9
+
10
+ # Inicializando o FastAPI
11
+ app = FastAPI()
12
+
13
+ # Download e configuração do modelo
14
+ model_name_or_path = "FabioSantos/llama3_1_fn"
15
+ model_basename = "unsloth.Q8_0.gguf"
16
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
17
+ print(model_path)
18
+
19
+ lcpp_llm = Llama(
20
+ model_path=model_path,
21
+ n_threads=2,
22
+ n_batch=512,
23
+ n_gpu_layers=-1,
24
+ n_ctx=4096,
25
+ )
26
+
27
+ prompt_template = "Responda as questões.\nHuman: {prompt}\nAssistant:\n"
28
+
29
+ def get_response(text: str) -> str:
30
+ prompt = prompt_template.format(prompt=text)
31
+ response = lcpp_llm(
32
+ prompt=prompt,
33
+ max_tokens=256,
34
+ temperature=0.5,
35
+ top_p=0.95,
36
+ top_k=50,
37
+ stop=[''], # Parada dinâmica quando esse token é detectado.
38
+ echo=True # Retorna o prompt
39
+ )
40
+ return response['choices'][0]['text'].split('Assistant:\n')[1]
41
+
42
+ # Endpoint para receber uma questão e retornar a resposta
43
+ @app.post("/ask")
44
+ def ask_question(question: Question):
45
+ response = get_response(question.text)
46
+ return {"response": response}
47
+
48
+ # Executa a aplicação
49
+ if __name__ == "__main__":
50
+ import uvicorn
51
+ uvicorn.run(app, host="0.0.0.0", port=8000)