aryo100 commited on
Commit
4ec3486
·
1 Parent(s): 53ee96a

update app

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -5,8 +5,19 @@ import torch, os, uvicorn
5
 
6
  app = FastAPI()
7
 
8
- model_name = "Qwen/Qwen-1_8B-Chat" # ganti sesuai ukuran
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_name,
12
  trust_remote_code=True,
@@ -20,15 +31,14 @@ class ChatRequest(BaseModel):
20
 
21
  @app.post("/chat")
22
  def chat(req: ChatRequest):
23
- # Format percakapan sesuai template Qwen
24
  messages = [
25
  {"role": "system", "content": "You are a helpful AI assistant."},
26
  {"role": "user", "content": req.prompt},
27
  ]
28
 
29
  text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
-
31
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
 
32
  outputs = model.generate(**inputs, max_new_tokens=req.max_new_tokens)
33
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
 
 
5
 
6
  app = FastAPI()
7
 
8
+ model_name = "Qwen/Qwen-1_8B-Chat"
9
+
10
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
11
+ # Pasang template manual kalau tidak tersedia
12
+ if tokenizer.chat_template is None:
13
+ tokenizer.chat_template = """{% for message in messages %}
14
+ {% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}
15
+ {% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}
16
+ {% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + '\n' }}
17
+ {% endif %}
18
+ {% endfor %}<|assistant|>
19
+ """
20
+
21
  model = AutoModelForCausalLM.from_pretrained(
22
  model_name,
23
  trust_remote_code=True,
 
31
 
32
  @app.post("/chat")
33
  def chat(req: ChatRequest):
 
34
  messages = [
35
  {"role": "system", "content": "You are a helpful AI assistant."},
36
  {"role": "user", "content": req.prompt},
37
  ]
38
 
39
  text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
40
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
41
+
42
  outputs = model.generate(**inputs, max_new_tokens=req.max_new_tokens)
43
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
44