lehungquangminh commited on
Commit
54d1587
·
verified ·
1 Parent(s): 9b70af2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -24
app.py CHANGED
@@ -4,13 +4,11 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
  MODEL_ID = "vietrix/viena-60m"
6
 
7
- # ==== LOAD MODEL 1 LẦN LÚC START SPACE ====
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
-
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_ID,
12
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
13
- device_map="auto", # cho nó tự nhét lên GPU nếu có
14
  )
15
 
16
 
@@ -21,14 +19,12 @@ def respond(
21
  max_tokens,
22
  temperature,
23
  top_p,
24
- hf_token: gr.OAuthToken, # vẫn giữ cho đẹp, thực ra không xài
25
  ):
26
- # build messages theo format chat
27
  messages = [{"role": "system", "content": system_message}]
28
- messages.extend(history) # history đã là list[{"role","content"}]
29
  messages.append({"role": "user", "content": message})
30
 
31
- # dùng chat template nếu model có
32
  if hasattr(tokenizer, "apply_chat_template"):
33
  prompt = tokenizer.apply_chat_template(
34
  messages,
@@ -36,17 +32,9 @@ def respond(
36
  add_generation_prompt=True,
37
  )
38
  else:
39
- # fallback tự ráp prompt đơn giản
40
- parts = [f"System: {system_message}\n"]
41
- for m in history:
42
- if m["role"] == "user":
43
- parts.append(f"User: {m['content']}\n")
44
- elif m["role"] == "assistant":
45
- parts.append(f"Assistant: {m['content']}\n")
46
- parts.append(f"User: {message}\nAssistant:")
47
- prompt = "".join(parts)
48
 
49
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
50
 
51
  outputs = model.generate(
52
  **inputs,
@@ -57,12 +45,11 @@ def respond(
57
  pad_token_id=tokenizer.eos_token_id,
58
  )
59
 
60
- generated_ids = outputs[0, inputs.input_ids.shape[1]:]
61
- full_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
62
 
63
- # stream từng ký tự cho hợp với ChatInterface stream
64
  resp = ""
65
- for ch in full_text:
66
  resp += ch
67
  yield resp
68
 
@@ -86,9 +73,8 @@ chatbot = gr.ChatInterface(
86
 
87
  with gr.Blocks() as demo:
88
  with gr.Sidebar():
89
- gr.LoginButton() # nếu m muốn bắt user login HF mới xài
90
  chatbot.render()
91
 
92
-
93
  if __name__ == "__main__":
94
  demo.launch()
 
4
 
5
  MODEL_ID = "vietrix/viena-60m"
6
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
8
  model = AutoModelForCausalLM.from_pretrained(
9
  MODEL_ID,
10
+ torch_dtype=torch.float32,
11
+ device_map="cpu",
12
  )
13
 
14
 
 
19
  max_tokens,
20
  temperature,
21
  top_p,
22
+ hf_token: gr.OAuthToken, # giữ cho hợp với template, nhưng không dùng
23
  ):
 
24
  messages = [{"role": "system", "content": system_message}]
25
+ messages.extend(history)
26
  messages.append({"role": "user", "content": message})
27
 
 
28
  if hasattr(tokenizer, "apply_chat_template"):
29
  prompt = tokenizer.apply_chat_template(
30
  messages,
 
32
  add_generation_prompt=True,
33
  )
34
  else:
35
+ prompt = message
 
 
 
 
 
 
 
 
36
 
37
+ inputs = tokenizer(prompt, return_tensors="pt")
38
 
39
  outputs = model.generate(
40
  **inputs,
 
45
  pad_token_id=tokenizer.eos_token_id,
46
  )
47
 
48
+ gen_ids = outputs[0, inputs.input_ids.shape[1]:]
49
+ text = tokenizer.decode(gen_ids, skip_special_tokens=True)
50
 
 
51
  resp = ""
52
+ for ch in text:
53
  resp += ch
54
  yield resp
55
 
 
73
 
74
  with gr.Blocks() as demo:
75
  with gr.Sidebar():
76
+ gr.LoginButton()
77
  chatbot.render()
78
 
 
79
  if __name__ == "__main__":
80
  demo.launch()