cochi1706 commited on
Commit
cf9addc
·
1 Parent(s): 8657f49

Implement coding assistant chatbot using Qwen3 model with PEFT adapter and Gradio interface

Browse files
Files changed (1) hide show
  1. app.py +108 -31
app.py CHANGED
@@ -1,5 +1,33 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def respond(
@@ -9,47 +37,99 @@ def respond(
9
  max_tokens,
10
  temperature,
11
  top_p,
12
- hf_token: gr.OAuthToken,
13
  ):
14
  """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
  """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
  messages = [{"role": "system", "content": system_message}]
20
-
21
  messages.extend(history)
22
-
23
  messages.append({"role": "user", "content": message})
24
-
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
  messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
  """
46
  chatbot = gr.ChatInterface(
47
  respond,
 
 
48
  type="messages",
49
  additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  gr.Slider(
54
  minimum=0.1,
55
  maximum=1.0,
@@ -60,10 +140,7 @@ chatbot = gr.ChatInterface(
60
  ],
61
  )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
67
 
68
 
69
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from peft import PeftModel
5
+
6
+
7
+ # Load model và tokenizer
8
+ print("Đang tải model...")
9
+ base_model_name = "Qwen/Qwen3-0.6B"
10
+ adapter_repo = "cochi1706/coding-assistant"
11
+
12
+ # Load base model
13
+ base_model = AutoModelForCausalLM.from_pretrained(
14
+ base_model_name,
15
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
+ device_map="auto" if torch.cuda.is_available() else None,
17
+ )
18
+
19
+ # Load PEFT adapter
20
+ model = PeftModel.from_pretrained(base_model, adapter_repo)
21
+
22
+ # Load tokenizer
23
+ tokenizer = AutoTokenizer.from_pretrained(adapter_repo)
24
+
25
+ # Set padding token nếu chưa có
26
+ if tokenizer.pad_token is None:
27
+ tokenizer.pad_token = tokenizer.eos_token
28
+
29
+ model.eval()
30
+ print("Model đã sẵn sàng!")
31
 
32
 
33
  def respond(
 
37
  max_tokens,
38
  temperature,
39
  top_p,
 
40
  ):
41
  """
42
+ Tạo phản hồi từ model coding assistant
43
  """
44
+ # Chuẩn bị prompt với chat template
 
45
  messages = [{"role": "system", "content": system_message}]
 
46
  messages.extend(history)
 
47
  messages.append({"role": "user", "content": message})
48
+
49
+ # Format messages với chat template
50
+ prompt = tokenizer.apply_chat_template(
 
51
  messages,
52
+ tokenize=False,
53
+ add_generation_prompt=True
54
+ )
55
+
56
+ # Tokenize
57
+ inputs = tokenizer(prompt, return_tensors="pt")
58
+ if torch.cuda.is_available():
59
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
60
+
61
+ # Generate với streaming token-by-token
62
+ input_length = inputs["input_ids"].shape[1]
63
+ response = ""
64
+
65
+ with torch.no_grad():
66
+ # Khởi tạo với input_ids
67
+ generated_ids = inputs["input_ids"].clone()
68
+
69
+ for _ in range(max_tokens):
70
+ # Forward pass
71
+ outputs = model(generated_ids)
72
+ logits = outputs.logits[:, -1, :]
73
+
74
+ # Apply temperature và top_p
75
+ if temperature != 1.0:
76
+ logits = logits / temperature
77
+
78
+ # Top-p sampling
79
+ if top_p < 1.0:
80
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
81
+ cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
82
+ sorted_indices_to_remove = cumulative_probs > top_p
83
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
84
+ sorted_indices_to_remove[..., 0] = 0
85
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
86
+ logits[indices_to_remove] = float('-inf')
87
+
88
+ # Sample next token
89
+ probs = torch.softmax(logits, dim=-1)
90
+ next_token = torch.multinomial(probs, num_samples=1)
91
+
92
+ # Kiểm tra EOS token
93
+ if next_token.item() == tokenizer.eos_token_id:
94
+ break
95
+
96
+ # Thêm token vào generated_ids
97
+ generated_ids = torch.cat([generated_ids, next_token], dim=1)
98
+
99
+ # Decode token mới và stream
100
+ new_text = tokenizer.decode([next_token.item()], skip_special_tokens=True)
101
+ response += new_text
102
+ yield response
103
 
104
 
105
  """
106
+ Chatbot hỗ trợ lập trình sử dụng Qwen3 fine-tuned model
107
  """
108
  chatbot = gr.ChatInterface(
109
  respond,
110
+ title="🤖 Coding Assistant",
111
+ description="Chatbot hỗ trợ lập trình",
112
  type="messages",
113
  additional_inputs=[
114
+ gr.Textbox(
115
+ value="You are a helpful coding assistant. Provide clear, concise, and accurate code solutions and explanations.",
116
+ label="System message",
117
+ lines=3,
118
+ ),
119
+ gr.Slider(
120
+ minimum=1,
121
+ maximum=2048,
122
+ value=512,
123
+ step=1,
124
+ label="Max new tokens",
125
+ ),
126
+ gr.Slider(
127
+ minimum=0.1,
128
+ maximum=2.0,
129
+ value=0.7,
130
+ step=0.1,
131
+ label="Temperature",
132
+ ),
133
  gr.Slider(
134
  minimum=0.1,
135
  maximum=1.0,
 
140
  ],
141
  )
142
 
143
+ demo = chatbot
 
 
 
144
 
145
 
146
  if __name__ == "__main__":