GTee2 commited on
Commit
240f866
·
verified ·
1 Parent(s): dd95682

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -6,13 +6,18 @@ from collections import defaultdict
6
 
7
  app = FastAPI(title="Mariza + Qwen3-0.6B CPU Free")
8
 
9
- print("Carregando Qwen3-0.6B em fp16 puro na CPU... (2-4 min na primeira vez)")
10
 
11
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B", trust_remote_code=True)
 
 
 
 
 
12
 
13
  model = AutoModelForCausalLM.from_pretrained(
14
  "Qwen/Qwen3-0.6B",
15
- torch_dtype="auto", # deixa o transformers escolher fp16/float16
16
  device_map="cpu",
17
  trust_remote_code=True,
18
  low_cpu_mem_usage=True
@@ -23,7 +28,7 @@ MAX_CONTEXT_TOKENS = 3800
23
 
24
  @app.get("/")
25
  async def root():
26
- return {"message": "Qwen3-0.6B tá vivo e quente na CPU free, chefe! Sem quantização, sem dor de cabeça 😈"}
27
 
28
  @app.post("/chat")
29
  async def chat(request: Request):
@@ -35,9 +40,8 @@ async def chat(request: Request):
35
  stream = data.get("stream", False)
36
 
37
  if not prompt:
38
- return JSONResponse({"error": "prompt vazio, seu safado"})
39
 
40
- # Monta histórico
41
  messages = history_db[user_id]
42
  full_prompt = ""
43
  for role, content in messages:
@@ -82,4 +86,4 @@ async def chat(request: Request):
82
 
83
  return JSONResponse({"response": resposta})
84
 
85
- print("Qwen3-0.6B carregado! Pode mandar o zap que Mariza tá pronta pra responder 24/7 😏")
 
6
 
7
  app = FastAPI(title="Mariza + Qwen3-0.6B CPU Free")
8
 
9
+ print("Carregando Qwen3-0.6B (forçando slow tokenizer pra burlar o bug do tokenizers)")
10
 
11
+ # <<< AS DUAS LINHAS MÁGICAS >>>
12
+ tokenizer = AutoTokenizer.from_pretrained(
13
+ "Qwen/Qwen3-0.6B",
14
+ trust_remote_code=True,
15
+ use_fast=False # <─ ESSA LINHA SALVA TUDO
16
+ )
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  "Qwen/Qwen3-0.6B",
20
+ torch_dtype="auto",
21
  device_map="cpu",
22
  trust_remote_code=True,
23
  low_cpu_mem_usage=True
 
28
 
29
  @app.get("/")
30
  async def root():
31
+ return {"message": "Qwen3-0.6B tá vivo e foda na CPU free, chefe! (sem bug do tokenizers) 😈"}
32
 
33
  @app.post("/chat")
34
  async def chat(request: Request):
 
40
  stream = data.get("stream", False)
41
 
42
  if not prompt:
43
+ return JSONResponse({"error": "prompt vazio, safado"})
44
 
 
45
  messages = history_db[user_id]
46
  full_prompt = ""
47
  for role, content in messages:
 
86
 
87
  return JSONResponse({"response": resposta})
88
 
89
+ print("Qwen3-0.6B carregado e pronto pra dominar o WhatsApp 24/7 de graça, chefe! 🔥")