aryo100 commited on
Commit
5383485
·
1 Parent(s): e4b129b

update app

Browse files
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -5,11 +5,10 @@ import torch
5
  import os
6
  import uvicorn
7
 
8
-
9
  app = FastAPI()
10
 
11
  # Load model & tokenizer sekali saat startup
12
- MODEL_NAME = "Qwen/Qwen-1_8B-Chat"
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  MODEL_NAME,
@@ -18,33 +17,32 @@ model = AutoModelForCausalLM.from_pretrained(
18
  trust_remote_code=True,
19
  )
20
  model.config.use_cache = True
21
- # if not tokenizer.chat_template:
22
- # tokenizer.chat_template = """{% for message in messages %}{{ message['role'] }}: {{ message['content'] }}
23
- # {% endfor %}Assistant:"""
 
 
 
 
24
 
25
  # Request schema
 
 
 
 
26
  class ChatRequest(BaseModel):
27
- messages: list # format [{"role": "user", "content": "halo"}]
28
  max_new_tokens: int = 128
29
 
30
  @app.post("/chat")
31
  def chat(req: ChatRequest):
32
- # Format input sesuai template Qwen
33
  text = tokenizer.apply_chat_template(
34
- req.messages,
35
  tokenize=False,
36
  add_generation_prompt=True
37
  )
38
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
39
 
40
- # prompt = ""
41
- # for msg in req.messages:
42
- # prompt += f"{msg['role']}: {msg['content']}\n"
43
- # prompt += "assistant:"
44
-
45
- # inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
-
47
- # Generate
48
  outputs = model.generate(
49
  **inputs,
50
  max_new_tokens=req.max_new_tokens,
@@ -53,7 +51,10 @@ def chat(req: ChatRequest):
53
  temperature=0.7
54
  )
55
 
56
- response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
 
 
 
57
 
58
  return {"response": response}
59
 
 
5
  import os
6
  import uvicorn
7
 
 
8
  app = FastAPI()
9
 
10
  # Load model & tokenizer sekali saat startup
11
+ MODEL_NAME = "Qwen/Qwen-1.8B-Chat"
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  MODEL_NAME,
 
17
  trust_remote_code=True,
18
  )
19
  model.config.use_cache = True
20
+
21
+ # fallback kalau chat_template kosong
22
+ if not tokenizer.chat_template:
23
+ tokenizer.chat_template = """{% for message in messages %}
24
+ {{ message['role'] }}: {{ message['content'] }}
25
+ {% endfor %}
26
+ assistant:"""
27
 
28
  # Request schema
29
+ class Message(BaseModel):
30
+ role: str
31
+ content: str
32
+
33
  class ChatRequest(BaseModel):
34
+ messages: list[Message]
35
  max_new_tokens: int = 128
36
 
37
  @app.post("/chat")
38
  def chat(req: ChatRequest):
 
39
  text = tokenizer.apply_chat_template(
40
+ [m.dict() for m in req.messages],
41
  tokenize=False,
42
  add_generation_prompt=True
43
  )
44
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
45
 
 
 
 
 
 
 
 
 
46
  outputs = model.generate(
47
  **inputs,
48
  max_new_tokens=req.max_new_tokens,
 
51
  temperature=0.7
52
  )
53
 
54
+ response = tokenizer.decode(
55
+ outputs[0][inputs["input_ids"].shape[1]:],
56
+ skip_special_tokens=True
57
+ )
58
 
59
  return {"response": response}
60