Anna154 commited on
Commit
5d8a3f9
·
verified ·
1 Parent(s): 1ac76d7

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +10 -0
  2. app.py +76 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer
2
+ import torch
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel
5
+ import os
6
+ from transformers import pipeline
7
+
8
+ os.environ['HF_HOME'] = '/tmp/.cache' #
9
+ MODEL_NAME = "Open-Orca/Mistral-7B-OpenOrca"
10
+
11
+ device = 0 if torch.cuda.is_available() else -1
12
+
13
+
14
+ pipe = pipeline(
15
+ "text-generation",
16
+ model="Open-Orca/Mistral-7B-OpenOrca",
17
+ device_map="auto",
18
+ model_kwargs={
19
+ "load_in_4bit": True,
20
+ "torch_dtype": torch.float16
21
+ }
22
+ )
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
+
25
+ SYSTEM_MESSAGE = {
26
+ "role": "system",
27
+ "content": (
28
+ "Você é um chatbot especialista em autismo. "
29
+ "Seu papel é orientar pais e professores com empatia, linguagem simples e respeitosa. "
30
+ "Evite termos técnicos, a não ser que a pessoa peça. "
31
+ "Valide as emoções de quem pergunta e ofereça exemplos práticos e acolhedores. "
32
+ "Fale sempre em português claro e humano."
33
+ )
34
+ }
35
+
36
+ app = FastAPI()
37
+
38
+ class ChatRequest(BaseModel):
39
+ message: str
40
+ chat_history: list = []
41
+
42
+ @app.post("/chat")
43
+ async def chat(request: ChatRequest):
44
+ try:
45
+ full_history = [SYSTEM_MESSAGE] + request.chat_history + [
46
+ {"role": "user", "content": request.message}
47
+ ]
48
+
49
+ prompt = tokenizer.apply_chat_template(
50
+ full_history,
51
+ tokenize=False,
52
+ add_generation_prompt=True
53
+ )
54
+
55
+ outputs = pipe(
56
+ prompt,
57
+ max_new_tokens=512,
58
+ temperature=0.7,
59
+ top_k=50,
60
+ top_p=0.9,
61
+ do_sample=True
62
+ )
63
+
64
+ resposta = outputs[0]['generated_text'].replace(prompt, "").strip()
65
+
66
+ return {
67
+ "response": resposta,
68
+ "updated_history": full_history + [{"role": "assistant", "content": resposta}]
69
+ }
70
+
71
+ except Exception as e:
72
+ raise HTTPException(status_code=500, detail=str(e))
73
+
74
+ @app.get("/")
75
+ def health_check():
76
+ return {"status": "online", "model": MODEL_NAME}
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ bitsandbytes>=0.43.0
2
+ accelerate>=0.30.0
3
+ transformers>=4.40.0
4
+ torch>=2.2.0
5
+ fastapi>=0.110.0
6
+ uvicorn>=0.29.0