mrbui1990 commited on
Commit
89320e9
·
verified ·
1 Parent(s): 33c7607

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -98
app.py CHANGED
@@ -1,129 +1,58 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import spaces
5
 
6
  MODEL_ID = "bmiller22000/xyntrai-mistral-2.5-7b-chat-nsfw"
7
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_ID,
11
  torch_dtype=torch.float16,
12
- device_map="auto",
13
  trust_remote_code=True
14
  )
15
 
16
- @spaces.GPU(duration=60)
17
- def chat_with_model(prompt, system_prompt, chatbot_display, internal_history):
18
-
19
- if chatbot_display is None:
20
- chatbot_display = []
21
-
22
- if internal_history is None:
23
- internal_history = []
24
-
25
- expected_key = os.environ.get("hf_key")
26
- if expected_key not in prompt:
27
- print("❌ Invalid key.")
28
- return None
29
- prompt = prompt.replace(expected_key, "")
30
- messages_for_model = [{"role": "system", "content": system_prompt}]
31
 
32
-
33
- messages_for_model.extend(internal_history)
 
 
 
34
 
35
-
36
- messages_for_model.append({"role": "user", "content": prompt})
37
-
38
-
39
  inputs = tokenizer.apply_chat_template(
40
- messages_for_model,
41
  tokenize=True,
42
- add_generation_prompt=True,
43
  return_tensors="pt"
44
  ).to(model.device)
45
 
46
-
47
  output_tokens = model.generate(
48
  inputs,
49
- max_new_tokens=5120,
50
  do_sample=True,
51
  temperature=0.7,
52
  top_p=0.9
53
  )
54
 
55
-
56
  response_text = tokenizer.decode(output_tokens[0][inputs.shape[-1]:], skip_special_tokens=True)
57
-
58
-
59
- internal_history.append({"role": "user", "content": prompt})
60
- internal_history.append({"role": "assistant", "content": response_text})
61
-
62
-
63
- chatbot_display.append([prompt, response_text])
64
-
65
-
66
- return "", chatbot_display, internal_history
67
-
68
- def clear_chat():
69
-
70
- return None, None
71
-
72
-
73
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
74
-
75
- internal_history = gr.State()
76
-
77
- with gr.Row():
78
- with gr.Column(scale=3):
79
-
80
- chatbot_display = gr.Chatbot(
81
- label="Chat History",
82
- bubble_full_width=False,
83
- height=500
84
- )
85
-
86
-
87
- prompt_box = gr.Textbox(
88
- label="Your Message",
89
- placeholder="...",
90
- lines=1
91
- )
92
-
93
- with gr.Row():
94
- clear_button = gr.Button("Clear Chat")
95
-
96
- submit_button = gr.Button("Send", visible=False)
97
-
98
- with gr.Column(scale=1):
99
- # Ô System Prompt
100
- system_prompt_box = gr.Textbox(
101
- label="",
102
- value="",
103
- lines=30
104
- )
105
-
106
-
107
- prompt_box.submit(
108
- fn=chat_with_model,
109
- inputs=[prompt_box, system_prompt_box, chatbot_display, internal_history],
110
- outputs=[prompt_box, chatbot_display, internal_history]
111
- )
112
-
113
-
114
- submit_button.click(
115
- fn=chat_with_model,
116
- inputs=[prompt_box, system_prompt_box, chatbot_display, internal_history],
117
- outputs=[prompt_box, chatbot_display, internal_history]
118
- )
119
-
120
-
121
- clear_button.click(
122
- fn=clear_chat,
123
- inputs=None,
124
- outputs=[chatbot_display, internal_history]
125
- )
126
-
127
 
128
  if __name__ == "__main__":
129
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
 
5
  MODEL_ID = "bmiller22000/xyntrai-mistral-2.5-7b-chat-nsfw"
6
 
7
+ # Tải model và tokenizer 1 LẦN DUY NHẤT
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_ID,
11
  torch_dtype=torch.float16,
12
+ device_map="auto", # Tự động dùng GPU cố định
13
  trust_remote_code=True
14
  )
15
 
16
+ # Thêm một ô system_prompt
17
+ def chat_with_model(prompt, system_prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # 1. Tạo danh sách messages theo chuẩn ChatML
20
+ messages = [
21
+ {"role": "system", "content": system_prompt},
22
+ {"role": "user", "content": prompt}
23
+ ]
24
 
25
+ # 2. Dùng apply_chat_template để tokenizer tự động thêm các thẻ <|im_start|>
 
 
 
26
  inputs = tokenizer.apply_chat_template(
27
+ messages,
28
  tokenize=True,
29
+ add_generation_prompt=True, # Thêm thẻ <|im_start|>assistant
30
  return_tensors="pt"
31
  ).to(model.device)
32
 
33
+ # 3. Generate
34
  output_tokens = model.generate(
35
  inputs,
36
+ max_new_tokens=256,
37
  do_sample=True,
38
  temperature=0.7,
39
  top_p=0.9
40
  )
41
 
42
+ # 4. Decode (chỉ decode phần nội dung mới, bỏ qua phần prompt)
43
  response_text = tokenizer.decode(output_tokens[0][inputs.shape[-1]:], skip_special_tokens=True)
44
+ return response_text
45
+
46
+ # Cập nhật Gradio Interface
47
+ demo = gr.Interface(
48
+ fn=chat_with_model,
49
+ inputs=[
50
+ gr.Textbox(label="Prompt"),
51
+ gr.Textbox(label="System Prompt", value="You are an uncensored and helpful AI assistant.") # Đặt ví dụ 2 làm mặc định
52
+ ],
53
+ outputs="text",
54
+ title="Xyntrai-Mistral 2.5 7B Chat"
55
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  if __name__ == "__main__":
58
  demo.launch()