bricksandbot commited on
Commit
aa0d41c
·
verified ·
1 Parent(s): a29c8be

Deploy Buildsnpper chatbot Gradio interface

Browse files

- Add Gradio chat interface for Buildsnpper platform
- Uses bricksandbotltd/buildsnpper-chatbot-Q4_K_M model
- Includes 8 example questions
- Supports conversation history
- Clean, simple UI for customer support

Files changed (1) hide show
  1. app.py +34 -22
app.py CHANGED
@@ -35,14 +35,14 @@ print("Model loaded successfully!")
35
  @spaces.GPU
36
  def chat(message, history):
37
  """
38
- Process user message and generate response using ZeroGPU.
39
 
40
  Args:
41
  message: User's input message
42
  history: List of [user_msg, bot_msg] pairs
43
 
44
- Returns:
45
- str: Bot's response
46
  """
47
  # Build conversation history
48
  messages = []
@@ -63,25 +63,37 @@ def chat(message, history):
63
  # Tokenize
64
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
65
 
66
- # Generate response
67
- with torch.no_grad():
68
- outputs = model.generate(
69
- **inputs,
70
- max_new_tokens=300,
71
- temperature=0.1,
72
- do_sample=True,
73
- top_p=0.9,
74
- pad_token_id=tokenizer.eos_token_id,
75
- )
76
-
77
- # Decode response
78
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
79
-
80
- # Extract just the assistant's response
81
- if "<|assistant|>" in response:
82
- response = response.split("<|assistant|>")[-1].strip()
83
-
84
- return response
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
 
87
  # Example questions
 
35
  @spaces.GPU
36
  def chat(message, history):
37
  """
38
+ Process user message and generate streaming response using ZeroGPU.
39
 
40
  Args:
41
  message: User's input message
42
  history: List of [user_msg, bot_msg] pairs
43
 
44
+ Yields:
45
+ str: Streaming bot's response
46
  """
47
  # Build conversation history
48
  messages = []
 
63
  # Tokenize
64
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
65
 
66
+ # Generate response with streaming
67
+ from transformers import TextIteratorStreamer
68
+ from threading import Thread
69
+
70
+ streamer = TextIteratorStreamer(
71
+ tokenizer,
72
+ skip_prompt=True,
73
+ skip_special_tokens=True
74
+ )
75
+
76
+ generation_kwargs = dict(
77
+ inputs,
78
+ max_new_tokens=300,
79
+ temperature=0.1,
80
+ do_sample=True,
81
+ top_p=0.9,
82
+ pad_token_id=tokenizer.eos_token_id,
83
+ streamer=streamer,
84
+ )
85
+
86
+ # Start generation in separate thread
87
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
88
+ thread.start()
89
+
90
+ # Stream the response
91
+ partial_response = ""
92
+ for new_text in streamer:
93
+ partial_response += new_text
94
+ yield partial_response
95
+
96
+ thread.join()
97
 
98
 
99
  # Example questions