michsethowusu commited on
Commit
77bdc4e
Β·
verified Β·
1 Parent(s): b767799

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -132
app.py CHANGED
@@ -4,12 +4,16 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
4
  from peft import PeftModel
5
  from threading import Thread
6
 
7
- # Your model configuration
 
 
8
  BASE_MODEL = "unsloth/Qwen3-4B-Instruct-2507"
9
  LORA_MODEL = "michsethowusu/twi_code_assistant"
10
 
11
- print("Loading base model...")
12
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
 
 
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  BASE_MODEL,
15
  torch_dtype=torch.float16,
@@ -17,131 +21,115 @@ base_model = AutoModelForCausalLM.from_pretrained(
17
  low_cpu_mem_usage=True
18
  )
19
 
20
- print("Loading LoRA adapters...")
21
  model = PeftModel.from_pretrained(base_model, LORA_MODEL)
22
- model = model.merge_and_unload() # Merge for faster inference
23
  print("Model ready!")
24
 
25
- def generate_response(message, history, temperature, top_p, top_k, max_tokens):
26
- """Generate response from the model with streaming"""
27
-
28
- # Build conversation history - history is list of [user_msg, bot_msg] pairs
29
- messages = []
30
- for user_msg, assistant_msg in history:
31
- if user_msg:
32
- messages.append({"role": "user", "content": user_msg})
33
- if assistant_msg:
34
- messages.append({"role": "assistant", "content": assistant_msg})
35
-
36
- # Add current message
37
- messages.append({"role": "user", "content": message})
38
-
39
- # Apply chat template
40
- text = tokenizer.apply_chat_template(
41
  messages,
42
  tokenize=False,
43
  add_generation_prompt=True
44
  )
45
-
46
- # Tokenize
47
- inputs = tokenizer(text, return_tensors="pt").to(model.device)
48
-
49
- # Setup streaming
50
  streamer = TextIteratorStreamer(
51
  tokenizer,
52
  skip_prompt=True,
53
  skip_special_tokens=True
54
  )
55
-
56
- # Generation kwargs
57
- generation_kwargs = {
58
  **inputs,
59
- "max_new_tokens": max_tokens,
60
- "temperature": temperature,
61
- "top_p": top_p,
62
- "top_k": top_k,
63
- "do_sample": True,
64
- "streamer": streamer,
65
- }
66
-
67
- # Start generation in separate thread
68
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
69
  thread.start()
70
-
71
- # Stream the response
72
- partial_message = ""
73
  for new_text in streamer:
74
- partial_message += new_text
75
- yield partial_message
76
-
77
  thread.join()
78
 
79
- # Create Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
81
  gr.Markdown(
82
  """
83
  # πŸ‡¬πŸ‡­ Twi Code Assistant
84
-
85
- A fine-tuned Qwen3-4B model specialized for coding assistance in Twi language context.
86
  Ask me anything about programming, and I'll help you out!
87
  """
88
  )
89
-
90
  chatbot = gr.Chatbot(
91
  height=500,
92
  label="Chat History",
93
- avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png")
 
94
  )
95
-
96
  with gr.Row():
97
  msg = gr.Textbox(
98
  label="Your Message",
99
- placeholder="Ask me a coding question...",
100
  scale=4,
101
- lines=2
102
  )
103
  submit = gr.Button("Send πŸš€", scale=1, variant="primary")
104
-
105
  with gr.Accordion("βš™οΈ Generation Parameters", open=False):
106
  gr.Markdown("*Adjust these settings to control the response style*")
107
-
108
- temperature = gr.Slider(
109
- minimum=0.1,
110
- maximum=2.0,
111
- value=0.7,
112
- step=0.1,
113
- label="Temperature",
114
- info="Higher = more creative, Lower = more focused"
115
- )
116
- top_p = gr.Slider(
117
- minimum=0.1,
118
- maximum=1.0,
119
- value=0.8,
120
- step=0.05,
121
- label="Top P",
122
- info="Nucleus sampling threshold"
123
- )
124
- top_k = gr.Slider(
125
- minimum=1,
126
- maximum=100,
127
- value=20,
128
- step=1,
129
- label="Top K",
130
- info="Number of top tokens to consider"
131
- )
132
- max_tokens = gr.Slider(
133
- minimum=64,
134
- maximum=2048,
135
- value=512,
136
- step=64,
137
- label="Max Tokens",
138
- info="Maximum length of response"
139
- )
140
-
141
- with gr.Row():
142
- clear = gr.Button("πŸ—‘οΈ Clear Chat")
143
-
144
- # Example prompts
145
  gr.Examples(
146
  examples=[
147
  ["How do I create a Python function?"],
@@ -153,66 +141,44 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
153
  inputs=msg,
154
  label="Example Questions"
155
  )
156
-
157
- # Event handlers
158
- def user_submit(user_message, history):
159
- return "", history + [[user_message, None]]
160
-
161
- def bot_respond(history, temperature, top_p, top_k, max_tokens):
162
- user_message = history[-1][0]
163
- history_context = history[:-1]
164
-
165
- history[-1][1] = ""
166
- for response in generate_response(
167
- user_message,
168
- history_context,
169
- temperature,
170
- top_p,
171
- top_k,
172
- max_tokens
173
- ):
174
- history[-1][1] = response
175
- yield history
176
-
177
- # Connect events
178
  msg.submit(
179
- user_submit,
180
- [msg, chatbot],
181
- [msg, chatbot],
182
- queue=False
183
  ).then(
184
  bot_respond,
185
  [chatbot, temperature, top_p, top_k, max_tokens],
186
- chatbot
187
  )
188
-
189
  submit.click(
190
- user_submit,
191
- [msg, chatbot],
192
- [msg, chatbot],
193
- queue=False
194
  ).then(
195
  bot_respond,
196
  [chatbot, temperature, top_p, top_k, max_tokens],
197
- chatbot
198
  )
199
-
200
  clear.click(lambda: None, None, chatbot, queue=False)
201
-
202
  gr.Markdown(
203
  """
204
  ---
205
  ### πŸ’‘ Tips for Best Results:
206
- - **Factual/Technical questions**: Use temperature 0.3-0.5
207
- - **Creative coding solutions**: Use temperature 0.7-1.0
208
- - **Code generation**: Use temperature 0.5-0.7
209
-
210
  ### πŸ“ About This Model
211
- This is a fine-tuned Qwen3-4B model trained with Unsloth for efficient coding assistance.
212
-
213
  **Model**: [michsethowusu/twi_code_assistant](https://huggingface.co/michsethowusu/twi_code_assistant)
214
  """
215
  )
216
 
 
 
 
217
  if __name__ == "__main__":
218
  demo.queue().launch()
 
4
  from peft import PeftModel
5
  from threading import Thread
6
 
7
+ # ------------------------------------------------------------------
8
+ # 1. Model setup
9
+ # ------------------------------------------------------------------
10
  BASE_MODEL = "unsloth/Qwen3-4B-Instruct-2507"
11
  LORA_MODEL = "michsethowusu/twi_code_assistant"
12
 
13
+ print("Loading tokenizer…")
14
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
+
16
+ print("Loading base model…")
17
  base_model = AutoModelForCausalLM.from_pretrained(
18
  BASE_MODEL,
19
  torch_dtype=torch.float16,
 
21
  low_cpu_mem_usage=True
22
  )
23
 
24
+ print("Loading LoRA adapters…")
25
  model = PeftModel.from_pretrained(base_model, LORA_MODEL)
26
+ model = model.merge_and_unload()
27
  print("Model ready!")
28
 
29
+ # ------------------------------------------------------------------
30
+ # 2. Generation helper
31
+ # ------------------------------------------------------------------
32
+ def generate_response(message: str, history: list[dict], temperature, top_p, top_k, max_tokens):
33
+ """
34
+ message: str – the newest user message
35
+ history: list[dict] – previous turns in {"role": "user"|"assistant", "content": "…"} format
36
+ yields partial assistant reply strings
37
+ """
38
+ messages = history + [{"role": "user", "content": message}]
39
+
40
+ prompt = tokenizer.apply_chat_template(
 
 
 
 
41
  messages,
42
  tokenize=False,
43
  add_generation_prompt=True
44
  )
45
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
+
 
 
 
47
  streamer = TextIteratorStreamer(
48
  tokenizer,
49
  skip_prompt=True,
50
  skip_special_tokens=True
51
  )
52
+
53
+ gen_kwargs = dict(
 
54
  **inputs,
55
+ max_new_tokens=max_tokens,
56
+ temperature=temperature,
57
+ top_p=top_p,
58
+ top_k=top_k,
59
+ do_sample=True,
60
+ streamer=streamer,
61
+ )
62
+
63
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
 
64
  thread.start()
65
+
66
+ partial = ""
 
67
  for new_text in streamer:
68
+ partial += new_text
69
+ yield partial
70
+
71
  thread.join()
72
 
73
+ # ------------------------------------------------------------------
74
+ # 3. Gradio event helpers
75
+ # ------------------------------------------------------------------
76
+ def user_submit(user_message, history):
77
+ # history is list[dict] – append user message
78
+ return "", history + [{"role": "user", "content": user_message}]
79
+
80
+
81
+ def bot_respond(history, temperature, top_p, top_k, max_tokens):
82
+ user_turn = history[-1]["content"]
83
+ history_before = history[:-1]
84
+
85
+ assistant_text = ""
86
+ for assistant_text in generate_response(
87
+ user_turn, history_before, temperature, top_p, top_k, max_tokens
88
+ ):
89
+ # update last dict incrementally
90
+ history[-1] = {"role": "assistant", "content": assistant_text}
91
+ yield history
92
+
93
+ # ------------------------------------------------------------------
94
+ # 4. Gradio UI
95
+ # ------------------------------------------------------------------
96
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
97
  gr.Markdown(
98
  """
99
  # πŸ‡¬πŸ‡­ Twi Code Assistant
100
+ A fine-tuned Qwen3-4B model specialised for coding assistance in Twi language context.
 
101
  Ask me anything about programming, and I'll help you out!
102
  """
103
  )
104
+
105
  chatbot = gr.Chatbot(
106
  height=500,
107
  label="Chat History",
108
+ type="messages", # NEW FORMAT
109
+ avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
110
  )
111
+
112
  with gr.Row():
113
  msg = gr.Textbox(
114
  label="Your Message",
115
+ placeholder="Ask me a coding question…",
116
  scale=4,
117
+ lines=2,
118
  )
119
  submit = gr.Button("Send πŸš€", scale=1, variant="primary")
120
+
121
  with gr.Accordion("βš™οΈ Generation Parameters", open=False):
122
  gr.Markdown("*Adjust these settings to control the response style*")
123
+ temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
124
+ top_p = gr.Slider(0.1, 1.0, 0.8, step=0.05, label="Top P")
125
+ top_k = gr.Slider(1, 100, 20, step=1, label="Top K")
126
+ max_tokens = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")
127
+
128
+ clear = gr.Button("πŸ—‘οΈ Clear Chat")
129
+
130
+ # ------------------------------------------------------------------
131
+ # 5. Examples
132
+ # ------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  gr.Examples(
134
  examples=[
135
  ["How do I create a Python function?"],
 
141
  inputs=msg,
142
  label="Example Questions"
143
  )
144
+
145
+ # ------------------------------------------------------------------
146
+ # 6. Event wiring
147
+ # ------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  msg.submit(
149
+ user_submit, [msg, chatbot], [msg, chatbot], queue=False
 
 
 
150
  ).then(
151
  bot_respond,
152
  [chatbot, temperature, top_p, top_k, max_tokens],
153
+ chatbot,
154
  )
155
+
156
  submit.click(
157
+ user_submit, [msg, chatbot], [msg, chatbot], queue=False
 
 
 
158
  ).then(
159
  bot_respond,
160
  [chatbot, temperature, top_p, top_k, max_tokens],
161
+ chatbot,
162
  )
163
+
164
  clear.click(lambda: None, None, chatbot, queue=False)
165
+
166
  gr.Markdown(
167
  """
168
  ---
169
  ### πŸ’‘ Tips for Best Results:
170
+ - **Factual/Technical questions**: temperature 0.3-0.5
171
+ - **Creative coding solutions**: temperature 0.7-1.0
172
+ - **Code generation**: temperature 0.5-0.7
173
+
174
  ### πŸ“ About This Model
175
+ Fine-tuned Qwen3-4B with Unsloth for efficient coding assistance.
 
176
  **Model**: [michsethowusu/twi_code_assistant](https://huggingface.co/michsethowusu/twi_code_assistant)
177
  """
178
  )
179
 
180
+ # ------------------------------------------------------------------
181
+ # 7. Launch
182
+ # ------------------------------------------------------------------
183
  if __name__ == "__main__":
184
  demo.queue().launch()