aryo100 commited on
Commit
e4b129b
·
1 Parent(s): 4d9abbf

update app

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -17,9 +17,10 @@ model = AutoModelForCausalLM.from_pretrained(
17
  device_map="cpu",
18
  trust_remote_code=True,
19
  )
20
- if not tokenizer.chat_template:
21
- tokenizer.chat_template = """{% for message in messages %}{{ message['role'] }}: {{ message['content'] }}
22
- {% endfor %}Assistant:"""
 
23
 
24
  # Request schema
25
  class ChatRequest(BaseModel):
@@ -29,19 +30,19 @@ class ChatRequest(BaseModel):
29
  @app.post("/chat")
30
  def chat(req: ChatRequest):
31
  # Format input sesuai template Qwen
32
- # text = tokenizer.apply_chat_template(
33
- # req.messages,
34
- # tokenize=False,
35
- # add_generation_prompt=True
36
- # )
37
- # inputs = tokenizer(text, return_tensors="pt").to(model.device)
38
 
39
- prompt = ""
40
- for msg in req.messages:
41
- prompt += f"{msg['role']}: {msg['content']}\n"
42
- prompt += "assistant:"
43
 
44
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
45
 
46
  # Generate
47
  outputs = model.generate(
 
17
  device_map="cpu",
18
  trust_remote_code=True,
19
  )
20
+ model.config.use_cache = True
21
+ # if not tokenizer.chat_template:
22
+ # tokenizer.chat_template = """{% for message in messages %}{{ message['role'] }}: {{ message['content'] }}
23
+ # {% endfor %}Assistant:"""
24
 
25
  # Request schema
26
  class ChatRequest(BaseModel):
 
30
  @app.post("/chat")
31
  def chat(req: ChatRequest):
32
  # Format input sesuai template Qwen
33
+ text = tokenizer.apply_chat_template(
34
+ req.messages,
35
+ tokenize=False,
36
+ add_generation_prompt=True
37
+ )
38
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
39
 
40
+ # prompt = ""
41
+ # for msg in req.messages:
42
+ # prompt += f"{msg['role']}: {msg['content']}\n"
43
+ # prompt += "assistant:"
44
 
45
+ # inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
 
47
  # Generate
48
  outputs = model.generate(