DevNumb commited on
Commit
3d70cf4
·
verified ·
1 Parent(s): 9a14bb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -89
app.py CHANGED
@@ -1,83 +1,101 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import torch
4
  import time
5
 
6
- # Initialize the pipeline
7
  @torch.no_grad()
8
  def load_model():
9
  print("Loading Qwen3-0.6B model...")
10
- pipe = pipeline(
11
- "text-generation",
12
- model="Qwen/Qwen3-0.6B",
 
 
 
 
 
 
13
  torch_dtype=torch.float16,
14
  device_map="auto",
15
  trust_remote_code=True
16
  )
 
17
  print("Model loaded successfully!")
18
- return pipe
19
 
20
  # Load the model
21
- pipe = load_model()
 
 
 
 
 
 
22
 
23
- def format_chat_template(messages):
24
  """
25
- Format messages using the model's chat template
26
  """
27
- try:
28
- # Use the model's built-in chat template
29
- formatted_prompt = pipe.tokenizer.apply_chat_template(
30
- messages,
31
- tokenize=False,
32
- add_generation_prompt=True
33
- )
34
- return formatted_prompt
35
- except Exception as e:
36
- # Fallback formatting
37
- conversation = ""
38
- for msg in messages:
39
- if msg["role"] == "user":
40
- conversation += f"User: {msg['content']}\n\nAssistant:"
41
- elif msg["role"] == "assistant":
42
- conversation += f" {msg['content']}\n\n"
43
- return conversation
44
 
45
  def generate_response(message, history, temperature=0.7, max_length=512):
46
  """
47
  Generate a response using Qwen3-0.6B
48
  """
 
 
 
49
  try:
50
- # Convert Gradio history to messages format
51
- messages = []
52
- for human_msg, assistant_msg in history:
53
- messages.extend([
54
- {"role": "user", "content": human_msg},
55
- {"role": "assistant", "content": assistant_msg}
56
- ])
 
 
57
 
58
- # Add current message
59
- messages.append({"role": "user", "content": message})
60
 
61
  # Generate response
62
- formatted_prompt = format_chat_template(messages)
 
 
 
 
 
 
 
 
 
 
63
 
64
- outputs = pipe(
65
- formatted_prompt,
66
- max_new_tokens=max_length,
67
- temperature=temperature,
68
- do_sample=True,
69
- top_p=0.9,
70
- repetition_penalty=1.1,
71
- pad_token_id=pipe.tokenizer.eos_token_id,
72
- eos_token_id=pipe.tokenizer.eos_token_id,
73
- return_full_text=False # Only return the generated part
74
- )
75
 
76
- response = outputs[0]['generated_text'].strip()
77
 
78
  # Clean up response
79
- if "User:" in response:
80
- response = response.split("User:")[0].strip()
 
81
 
82
  return response
83
 
@@ -215,6 +233,11 @@ custom_css = """
215
  text-fill-color: transparent;
216
  font-weight: 700 !important;
217
  }
 
 
 
 
 
218
  """
219
 
220
  # Create the Gradio interface
@@ -227,6 +250,18 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
227
  elem_classes="markdown-container"
228
  )
229
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  with gr.Row(equal_height=False):
231
  with gr.Column(scale=3):
232
  chatbot = gr.Chatbot(
@@ -241,13 +276,18 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
241
  with gr.Row():
242
  msg = gr.Textbox(
243
  label="💭 Your message",
244
- placeholder="Ask me anything...",
245
  lines=2,
246
  scale=4,
247
- container=False
 
248
  )
249
  with gr.Column(scale=1):
250
- submit_btn = gr.Button("Send 🚀", size="lg")
 
 
 
 
251
 
252
  with gr.Row():
253
  clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
@@ -292,43 +332,44 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
292
  - Adjust temperature for creativity
293
  """)
294
 
295
- # Event handlers
296
- submit_event = msg.submit(
297
- chat_interface,
298
- inputs=[msg, chatbot, temperature, max_length],
299
- outputs=[msg, chatbot]
300
- )
301
-
302
- submit_btn.click(
303
- chat_interface,
304
- inputs=[msg, chatbot, temperature, max_length],
305
- outputs=[msg, chatbot]
306
- )
307
-
308
- clear_btn.click(
309
- clear_chat,
310
- outputs=[chatbot]
311
- )
312
-
313
- retry_btn.click(
314
- retry_last_response,
315
- inputs=[chatbot, temperature, max_length],
316
- outputs=[chatbot]
317
- )
318
-
319
- # Additional examples
320
- with gr.Accordion("💡 Example Prompts", open=False):
321
- gr.Examples(
322
- examples=[
323
- "Explain quantum computing in simple terms",
324
- "Write a short poem about artificial intelligence",
325
- "What are the benefits of renewable energy?",
326
- "How do I learn programming effectively?",
327
- "Tell me an interesting fact about space"
328
- ],
329
- inputs=msg,
330
- label="Click any example to try it out!"
331
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
  if __name__ == "__main__":
334
  demo.launch(
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import time
5
 
6
+ # Initialize the model and tokenizer
7
  @torch.no_grad()
8
  def load_model():
9
  print("Loading Qwen3-0.6B model...")
10
+
11
+ # Load tokenizer and model
12
+ tokenizer = AutoTokenizer.from_pretrained(
13
+ "Qwen/Qwen3-0.6B",
14
+ trust_remote_code=True
15
+ )
16
+
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ "Qwen/Qwen3-0.6B",
19
  torch_dtype=torch.float16,
20
  device_map="auto",
21
  trust_remote_code=True
22
  )
23
+
24
  print("Model loaded successfully!")
25
+ return tokenizer, model
26
 
27
  # Load the model
28
+ try:
29
+ tokenizer, model = load_model()
30
+ print(f"Model device: {model.device}")
31
+ print(f"Model dtype: {model.dtype}")
32
+ except Exception as e:
33
+ print(f"Error loading model: {e}")
34
+ tokenizer, model = None, None
35
 
36
+ def format_messages(history, new_message):
37
  """
38
+ Format chat history and new message into the required format
39
  """
40
+ messages = []
41
+
42
+ # Add history
43
+ for human_msg, assistant_msg in history:
44
+ messages.extend([
45
+ {"role": "user", "content": human_msg},
46
+ {"role": "assistant", "content": assistant_msg}
47
+ ])
48
+
49
+ # Add new message
50
+ messages.append({"role": "user", "content": new_message})
51
+
52
+ return messages
 
 
 
 
53
 
54
  def generate_response(message, history, temperature=0.7, max_length=512):
55
  """
56
  Generate a response using Qwen3-0.6B
57
  """
58
+ if tokenizer is None or model is None:
59
+ return "Model is not loaded properly. Please check the logs."
60
+
61
  try:
62
+ # Format messages
63
+ messages = format_messages(history, message)
64
+
65
+ # Apply chat template
66
+ text = tokenizer.apply_chat_template(
67
+ messages,
68
+ tokenize=False,
69
+ add_generation_prompt=True
70
+ )
71
 
72
+ # Tokenize
73
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
74
 
75
  # Generate response
76
+ with torch.no_grad():
77
+ generated_ids = model.generate(
78
+ **model_inputs,
79
+ max_new_tokens=max_length,
80
+ temperature=temperature,
81
+ do_sample=True,
82
+ top_p=0.9,
83
+ repetition_penalty=1.1,
84
+ eos_token_id=tokenizer.eos_token_id,
85
+ pad_token_id=tokenizer.eos_token_id
86
+ )
87
 
88
+ # Decode response
89
+ generated_ids = [
90
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
91
+ ]
 
 
 
 
 
 
 
92
 
93
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
94
 
95
  # Clean up response
96
+ response = response.strip()
97
+ if "<|im_end|>" in response:
98
+ response = response.split("<|im_end|>")[0].strip()
99
 
100
  return response
101
 
 
233
  text-fill-color: transparent;
234
  font-weight: 700 !important;
235
  }
236
+
237
+ .loading {
238
+ opacity: 0.7;
239
+ pointer-events: none;
240
+ }
241
  """
242
 
243
  # Create the Gradio interface
 
250
  elem_classes="markdown-container"
251
  )
252
 
253
+ # Show loading status
254
+ if tokenizer is None or model is None:
255
+ gr.Markdown("""
256
+ ## ⚠️ Model Loading Issue
257
+ The model is taking longer than expected to load. This might be due to:
258
+ - Large model size download
259
+ - Hugging Face API limitations
260
+ - Insufficient resources
261
+
262
+ Please wait a few minutes and refresh the page.
263
+ """)
264
+
265
  with gr.Row(equal_height=False):
266
  with gr.Column(scale=3):
267
  chatbot = gr.Chatbot(
 
276
  with gr.Row():
277
  msg = gr.Textbox(
278
  label="💭 Your message",
279
+ placeholder="Ask me anything..." if tokenizer and model else "Model is loading...",
280
  lines=2,
281
  scale=4,
282
+ container=False,
283
+ interactive=tokenizer is not None and model is not None
284
  )
285
  with gr.Column(scale=1):
286
+ submit_btn = gr.Button(
287
+ "Send 🚀" if tokenizer and model else "Loading...",
288
+ size="lg",
289
+ interactive=tokenizer is not None and model is not None
290
+ )
291
 
292
  with gr.Row():
293
  clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
 
332
  - Adjust temperature for creativity
333
  """)
334
 
335
+ # Event handlers (only if model is loaded)
336
+ if tokenizer is not None and model is not None:
337
+ submit_event = msg.submit(
338
+ chat_interface,
339
+ inputs=[msg, chatbot, temperature, max_length],
340
+ outputs=[msg, chatbot]
341
+ )
342
+
343
+ submit_btn.click(
344
+ chat_interface,
345
+ inputs=[msg, chatbot, temperature, max_length],
346
+ outputs=[msg, chatbot]
347
+ )
348
+
349
+ clear_btn.click(
350
+ clear_chat,
351
+ outputs=[chatbot]
352
+ )
353
+
354
+ retry_btn.click(
355
+ retry_last_response,
356
+ inputs=[chatbot, temperature, max_length],
357
+ outputs=[chatbot]
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  )
359
+
360
+ # Additional examples
361
+ with gr.Accordion("💡 Example Prompts", open=False):
362
+ gr.Examples(
363
+ examples=[
364
+ "Explain quantum computing in simple terms",
365
+ "Write a short poem about artificial intelligence",
366
+ "What are the benefits of renewable energy?",
367
+ "How do I learn programming effectively?",
368
+ "Tell me an interesting fact about space"
369
+ ],
370
+ inputs=msg,
371
+ label="Click any example to try it out!"
372
+ )
373
 
374
  if __name__ == "__main__":
375
  demo.launch(