pszemraj commited on
Commit
9dd80f6
·
verified ·
1 Parent(s): ee4117d

role fix attempt

Browse files

Correct left/right roles by switching to Chatbot(type="messages") and mapping UserLM turns to role='assistant' (left) and your replies to role='user' (right) per Gradio’s message schema
Gradio

Guardrail 1 actually applied via a custom LogitsProcessor that forbids the 6 first tokens on the first generated token only; the other three guardrails are enforced as in Appendix C.1 (length 3–25, block <|endconversation|>, verbatim filtering) .

Defaults align with the model card/paper: temperature=1.0, top_p=0.8, stop on <|eot_id|>, block <|endconversation|> (you can still tune via sliders)

Files changed (1) hide show
  1. app.py +191 -213
app.py CHANGED
@@ -1,16 +1,17 @@
1
  from __future__ import annotations
2
 
3
  import os
4
- from typing import Any, Dict, List, Tuple
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
10
 
11
- # ----------------------
12
  # Config
13
- # ----------------------
14
  MODEL_ID = os.getenv("MODEL_ID", "microsoft/UserLM-8b")
15
  DEFAULT_SYSTEM_PROMPT = (
16
  "You are a user who wants to implement a special type of sequence. "
@@ -18,139 +19,140 @@ DEFAULT_SYSTEM_PROMPT = (
18
  "The first two numbers in the sequence are 1 and 1."
19
  )
20
 
21
-
 
 
22
  def load_model(model_id: str = MODEL_ID):
23
- """Load tokenizer and model, with a reasonable dtype and device fallback."""
24
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
25
-
26
- model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
  trust_remote_code=True,
29
  torch_dtype="auto",
30
  device_map="auto",
31
  )
32
 
33
- # Special tokens for stopping / filtering
34
- end_token = "<|eot_id|>"
35
- end_conv_token = "<|endconversation|>"
36
- end_token_ids = tokenizer.encode(end_token, add_special_tokens=False)
37
- end_conv_token_ids = tokenizer.encode(end_conv_token, add_special_tokens=False)
 
 
38
 
39
- # Guardrail 1: Problematic first tokens that cause repetition (from Appendix C.1)
40
- problematic_tokens = ["I", "You", "Here", "i", "you", "here"]
41
  first_token_filter_ids = []
42
- for token in problematic_tokens:
43
- token_ids = tokenizer.encode(token, add_special_tokens=False)
44
- if len(token_ids) > 0:
45
- first_token_filter_ids.append(token_ids[0])
46
-
47
- eos_token_id = (
48
- end_token_ids[0] if len(end_token_ids) > 0 else tokenizer.eos_token_id
49
- )
50
- bad_words_ids = (
51
- [[tid] for tid in end_conv_token_ids] if len(end_conv_token_ids) > 0 else None
52
- )
53
 
54
- return tokenizer, model, eos_token_id, bad_words_ids, first_token_filter_ids
55
 
56
 
57
  tokenizer, model, EOS_TOKEN_ID, BAD_WORDS_IDS, FIRST_TOKEN_FILTER_IDS = load_model()
58
  model.eval()
59
 
60
- # ----------------------
61
- # Generation helper
62
- # ----------------------
 
 
 
63
 
64
 
65
- def build_messages(
66
- system_prompt: str, history: List[Tuple[str, str]]
67
- ) -> List[Dict[str, str]]:
68
- """Transform Gradio history into chat template messages.
69
-
70
- History is stored as (model_user, human_assistant) tuples.
71
- """
72
- messages: List[Dict[str, str]] = []
73
- if system_prompt.strip():
74
- messages.append({"role": "system", "content": system_prompt.strip()})
75
-
76
- # Each tuple is (model_user, human_assistant)
77
- for model_user, human_assistant in history:
78
- if model_user:
79
- messages.append({"role": "user", "content": model_user})
80
- if human_assistant:
81
- messages.append({"role": "assistant", "content": human_assistant})
82
 
83
- return messages
84
 
 
 
85
 
86
- def apply_first_token_filter(
87
- logits: torch.Tensor, filter_ids: List[int]
88
- ) -> torch.Tensor:
89
- """Apply logit filter for problematic first tokens (Guardrail 1)."""
90
- logits_filtered = logits.clone()
91
- for token_id in filter_ids:
92
- logits_filtered[0, -1, token_id] = float("-inf")
93
- return logits_filtered
94
 
 
 
 
 
 
 
 
95
 
96
- def is_valid_length(text: str, min_words: int = 3, max_words: int = 50) -> bool:
97
- """Check if generated text meets length requirements (Guardrail 3).
98
 
99
- Paper used max_words=25 for their simulation experiments, but we use 50
100
- for interactive demo to allow slightly longer responses while still preventing
101
- the model from revealing the entire intent at once.
 
 
 
102
  """
103
- word_count = len(text.split())
104
- return min_words <= word_count <= max_words
105
-
106
-
107
- def is_verbatim_repetition(
108
- new_text: str, history: List[Tuple[str, str]], system_prompt: str
109
- ) -> bool:
110
- """Check if text is exact repetition of prior user turn or system prompt (Guardrail 4)."""
111
- new_text_normalized = new_text.strip().lower()
112
-
113
- # Check against system prompt
114
- if new_text_normalized == system_prompt.strip().lower():
115
- return True
116
 
117
- # Check against previous model user messages (first element in tuple)
118
- for model_user, _ in history:
119
- if model_user and new_text_normalized == model_user.strip().lower():
120
- return True
121
 
122
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
 
 
 
 
125
  @spaces.GPU
126
  def generate_reply(
127
- messages: List[Dict[str, str]],
128
- history: List[Tuple[str, str]],
129
  system_prompt: str,
130
- max_new_tokens: int = 256,
 
131
  temperature: float = 1.0,
132
  top_p: float = 0.8,
133
  max_retries: int = 5,
134
  ) -> str:
135
- """Run generation with guardrails from Appendix C.1.
136
-
137
- Implements all 4 guardrails from the paper:
138
- 1. Filter problematic first tokens
139
- 2. Optionally avoid dialogue termination (disabled by default for demo)
140
- 3. Enforce length thresholds with retry
141
- 4. Filter verbatim repetitions with retry
142
- """
143
-
144
- for attempt in range(max_retries):
145
- # Prepare input ids using the model's chat template
146
- inputs = tokenizer.apply_chat_template(
147
- messages,
148
- return_tensors="pt",
149
- add_generation_prompt=True,
150
- ).to(model.device)
151
 
152
  with torch.no_grad():
153
- outputs = model.generate(
154
  input_ids=inputs,
155
  do_sample=True,
156
  top_p=top_p,
@@ -158,139 +160,109 @@ def generate_reply(
158
  max_new_tokens=max_new_tokens,
159
  eos_token_id=EOS_TOKEN_ID,
160
  pad_token_id=tokenizer.eos_token_id,
161
- bad_words_ids=BAD_WORDS_IDS, # Prevents <|endconversation|>
 
162
  )
163
 
164
- # Slice off the prompt tokens to get only the new text
165
- generated = outputs[0][inputs.shape[1] :]
166
- text = tokenizer.decode(generated, skip_special_tokens=True).strip()
167
 
168
- # Apply guardrails - retry if checks fail
169
- if not is_valid_length(text):
170
  continue
171
-
172
- if is_verbatim_repetition(text, history, system_prompt):
173
  continue
174
-
175
- # Success - return the valid text
176
  return text
177
 
178
- # If all retries failed, raise an error
179
- raise RuntimeError(
180
- f"Failed to generate valid response after {max_retries} attempts"
181
- )
182
-
183
-
184
- # ----------------------
185
- # Gradio UI callbacks
186
- # ----------------------
187
 
188
 
 
 
 
189
  def respond(
190
- assistant_message: str,
191
- chat_history: List[Tuple[str, str]],
192
  system_prompt: str,
193
  max_new_tokens: int,
194
  temperature: float,
195
  top_p: float,
196
  ):
197
- """Generate next user turn.
198
-
199
- Flow:
200
- - If history empty: Generate first user message (ignores assistant_message input)
201
- - If history exists: Add assistant response and generate next user turn
202
-
203
- History format: (model_user, human_assistant)
204
- """
205
-
206
- # First message generation - ignore any text in the assistant box
207
- if len(chat_history) == 0:
208
- # Generate initial user message from system prompt alone
209
- messages = build_messages(system_prompt, [])
210
-
211
- user_reply = generate_reply(
212
- messages,
213
- chat_history,
214
  system_prompt,
 
215
  max_new_tokens=max_new_tokens,
216
  temperature=temperature,
217
  top_p=top_p,
218
  )
 
 
219
 
220
- # Start conversation with first user message (empty assistant slot)
221
- chat_history = [(user_reply, None)]
222
- return chat_history, chat_history
223
-
224
- # Subsequent messages - require assistant response
225
- if not assistant_message.strip():
226
- # User clicked generate without providing assistant response
227
- gr.Info(
228
- "Please type your assistant response before generating the next user message."
229
- )
230
- return chat_history, chat_history
231
-
232
- # Update the last tuple with the assistant response
233
- last_model_user, _ = chat_history[-1]
234
- chat_history[-1] = (last_model_user, assistant_message.strip())
235
 
236
- # Build messages for next user turn generation
237
- messages = build_messages(system_prompt, chat_history)
 
238
 
239
- user_reply = generate_reply(
240
- messages,
241
- chat_history,
242
  system_prompt,
 
243
  max_new_tokens=max_new_tokens,
244
  temperature=temperature,
245
  top_p=top_p,
246
  )
 
247
 
248
- # Add new model user message (with empty assistant slot)
249
- chat_history.append((user_reply, None))
250
-
251
- return chat_history, chat_history
252
 
253
 
254
- def clear_state():
255
- return [], DEFAULT_SYSTEM_PROMPT
256
 
257
 
258
- # ----------------------
259
- # Build the Gradio App
260
- # ----------------------
261
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
262
  gr.Markdown(
263
  f"""
264
- # UserLM-8b: User Language Model Demo
265
-
266
- **Model:** `{MODEL_ID}`
267
-
268
- The AI plays the user, you play the assistant.
269
- """
270
  )
271
 
272
- with gr.Row():
273
- system_box = gr.Textbox(
274
- label="User Intent",
275
- value=DEFAULT_SYSTEM_PROMPT,
276
- lines=3,
277
- placeholder="Enter the user's goal or intent",
278
- )
279
 
 
280
  chatbot = gr.Chatbot(
281
- height=420,
282
  label="Conversation",
 
 
 
 
 
 
283
  )
284
 
285
- with gr.Row():
286
- msg = gr.Textbox(
287
- label="Assistant Response",
288
- placeholder="Leave empty for first generation, then type your responses",
289
- lines=2,
290
- )
291
 
292
  with gr.Accordion("Generation Settings", open=False):
293
- max_new_tokens = gr.Slider(16, 512, value=256, step=16, label="max_new_tokens")
294
  temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="temperature")
295
  top_p = gr.Slider(0.0, 1.0, value=0.8, step=0.01, label="top_p")
296
 
@@ -298,46 +270,52 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
298
  submit_btn = gr.Button("Generate", variant="primary")
299
  clear_btn = gr.Button("Clear")
300
 
301
- state = gr.State([]) # chat history: List[Tuple[model_user, human_assistant]]
 
302
 
303
  with gr.Accordion("Implementation Details", open=False):
304
  gr.Markdown(
305
  """
306
- Based on Appendix C.1 of the UserLM paper:
307
- - Sampling: temp=1.0, top_p=0.8
308
- - First token filtering for problematic tokens
309
- - Length constraints: 3-50 words
310
- - Repetition filtering
311
  """
312
  )
313
 
314
- def _submit(asst_text, history, system_prompt, mnt, temp, tp):
315
- new_history, visible = respond(asst_text, history, system_prompt, mnt, temp, tp)
316
- # Clear input box after submission
317
- return "", visible
 
318
 
319
  submit_btn.click(
320
  fn=_submit,
321
- inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
322
- outputs=[msg, chatbot],
 
 
 
 
 
 
 
323
  )
324
  msg.submit(
325
  fn=_submit,
326
- inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
327
- outputs=[msg, chatbot],
 
 
 
 
 
 
 
328
  )
329
 
330
- # Keep state in sync with the visible Chatbot
331
- def _sync_state(chat):
332
- return chat
333
-
334
- chatbot.change(_sync_state, inputs=[chatbot], outputs=[state])
335
-
336
- def _clear():
337
- history, sys = clear_state()
338
- return history, sys, history, ""
339
-
340
- clear_btn.click(_clear, outputs=[state, system_box, chatbot, msg])
341
 
342
  if __name__ == "__main__":
343
- demo.queue().launch()
 
1
  from __future__ import annotations
2
 
3
  import os
4
+ from typing import Any, Dict, List, Optional, Tuple
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
+ from transformers.generation.logits_process import LogitsProcessor, LogitsProcessorList
11
 
12
+ # ======================
13
  # Config
14
+ # ======================
15
  MODEL_ID = os.getenv("MODEL_ID", "microsoft/UserLM-8b")
16
  DEFAULT_SYSTEM_PROMPT = (
17
  "You are a user who wants to implement a special type of sequence. "
 
19
  "The first two numbers in the sequence are 1 and 1."
20
  )
21
 
22
+ # ======================
23
+ # Load model
24
+ # ======================
25
  def load_model(model_id: str = MODEL_ID):
26
+ tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
27
+ mdl = AutoModelForCausalLM.from_pretrained(
 
 
28
  model_id,
29
  trust_remote_code=True,
30
  torch_dtype="auto",
31
  device_map="auto",
32
  )
33
 
34
+ # Special tokens
35
+ eot = "<|eot_id|>"
36
+ end_conv = "<|endconversation|>"
37
+ eot_ids = tok.encode(eot, add_special_tokens=False)
38
+ end_conv_ids = tok.encode(end_conv, add_special_tokens=False)
39
+ eos_token_id = eot_ids[0] if len(eot_ids) > 0 else tok.eos_token_id
40
+ bad_words_ids = [[tid] for tid in end_conv_ids] if len(end_conv_ids) > 0 else None
41
 
42
+ # Guardrail 1: problematic first tokens (Appendix C.1)
43
+ prob_first_tokens = ["I", "You", "Here", "i", "you", "here"]
44
  first_token_filter_ids = []
45
+ for w in prob_first_tokens:
46
+ ids = tok.encode(w, add_special_tokens=False)
47
+ if ids:
48
+ first_token_filter_ids.append(ids[0])
 
 
 
 
 
 
 
49
 
50
+ return tok, mdl, eos_token_id, bad_words_ids, first_token_filter_ids
51
 
52
 
53
  tokenizer, model, EOS_TOKEN_ID, BAD_WORDS_IDS, FIRST_TOKEN_FILTER_IDS = load_model()
54
  model.eval()
55
 
56
+ # ======================
57
+ # Guardrail helpers
58
+ # ======================
59
+ def is_valid_length(text: str, min_words: int = 3, max_words: int = 25) -> bool:
60
+ wc = len(text.split())
61
+ return min_words <= wc <= max_words
62
 
63
 
64
+ def is_verbatim_repetition(
65
+ new_text: str, history_pairs: List[Tuple[str, Optional[str]]], system_prompt: str
66
+ ) -> bool:
67
+ t = new_text.strip().lower()
68
+ if t == system_prompt.strip().lower():
69
+ return True
70
+ for model_user, _ in history_pairs:
71
+ if model_user and t == model_user.strip().lower():
72
+ return True
73
+ return False
 
 
 
 
 
 
 
74
 
 
75
 
76
+ class ForbidFirstToken(LogitsProcessor):
77
+ """Set -inf on a token list for the *first* generated token only."""
78
 
79
+ def __init__(self, forbid_ids: List[int], prompt_len: int):
80
+ self.forbid = list(set(int(x) for x in forbid_ids))
81
+ self.prompt_len = int(prompt_len)
 
 
 
 
 
82
 
83
+ def __call__(
84
+ self, input_ids: torch.LongTensor, scores: torch.FloatTensor
85
+ ) -> torch.FloatTensor:
86
+ # Apply only when generating the very first token (seq len == prompt_len)
87
+ if input_ids.shape[1] == self.prompt_len and self.forbid:
88
+ scores[:, self.forbid] = float("-inf")
89
+ return scores
90
 
 
 
91
 
92
+ # ======================
93
+ # Message utilities
94
+ # ======================
95
+ def build_hf_messages(
96
+ system_prompt: str, history_pairs: List[Tuple[str, Optional[str]]]
97
+ ) -> List[Dict[str, str]]:
98
  """
99
+ Construct messages for tokenizer.apply_chat_template.
100
+ history_pairs = list of (model_user, human_assistant)
101
+ """
102
+ msgs: List[Dict[str, str]] = []
103
+ if system_prompt.strip():
104
+ msgs.append({"role": "system", "content": system_prompt.strip()})
105
+ for model_user, human_assistant in history_pairs:
106
+ if model_user:
107
+ msgs.append({"role": "user", "content": model_user})
108
+ if human_assistant:
109
+ msgs.append({"role": "assistant", "content": human_assistant})
110
+ return msgs
 
111
 
 
 
 
 
112
 
113
+ def pairs_to_ui_messages(
114
+ history_pairs: List[Tuple[str, Optional[str]]]
115
+ ) -> List[Dict[str, str]]:
116
+ """
117
+ Convert (model_user, human_assistant) pairs to Gradio Chatbot(type='messages') UI messages.
118
+ Visual convention:
119
+ - LEFT (role='assistant'): UserLM's utterances (the simulator)
120
+ - RIGHT (role='user'): Your replies (you play the assistant)
121
+ """
122
+ ui: List[Dict[str, str]] = []
123
+ for model_user, human_assistant in history_pairs:
124
+ if model_user:
125
+ ui.append({"role": "assistant", "content": model_user})
126
+ if human_assistant:
127
+ ui.append({"role": "user", "content": human_assistant})
128
+ return ui
129
 
130
 
131
+ # ======================
132
+ # Generation
133
+ # ======================
134
  @spaces.GPU
135
  def generate_reply(
 
 
136
  system_prompt: str,
137
+ history_pairs: List[Tuple[str, Optional[str]]],
138
+ max_new_tokens: int = 128,
139
  temperature: float = 1.0,
140
  top_p: float = 0.8,
141
  max_retries: int = 5,
142
  ) -> str:
143
+ """Implements the 4 guardrails from Appendix C.1."""
144
+ messages = build_hf_messages(system_prompt, history_pairs)
145
+ inputs = tokenizer.apply_chat_template(
146
+ messages, return_tensors="pt", add_generation_prompt=True
147
+ ).to(model.device)
148
+
149
+ for _ in range(max_retries):
150
+ lp = LogitsProcessorList(
151
+ [ForbidFirstToken(FIRST_TOKEN_FILTER_IDS, prompt_len=inputs.shape[1])]
152
+ )
 
 
 
 
 
 
153
 
154
  with torch.no_grad():
155
+ out = model.generate(
156
  input_ids=inputs,
157
  do_sample=True,
158
  top_p=top_p,
 
160
  max_new_tokens=max_new_tokens,
161
  eos_token_id=EOS_TOKEN_ID,
162
  pad_token_id=tokenizer.eos_token_id,
163
+ bad_words_ids=BAD_WORDS_IDS, # Guardrail 2: block <|endconversation|>
164
+ logits_processor=lp, # Guardrail 1
165
  )
166
 
167
+ gen = out[0][inputs.shape[1] :]
168
+ text = tokenizer.decode(gen, skip_special_tokens=True).strip()
 
169
 
170
+ # Guardrails 3 & 4
171
+ if not is_valid_length(text, min_words=3, max_words=25):
172
  continue
173
+ if is_verbatim_repetition(text, history_pairs, system_prompt):
 
174
  continue
 
 
175
  return text
176
 
177
+ raise RuntimeError("Failed to generate a valid user utterance after retries.")
 
 
 
 
 
 
 
 
178
 
179
 
180
+ # ======================
181
+ # Gradio UI
182
+ # ======================
183
  def respond(
184
+ your_reply: str,
185
+ history_pairs: List[Tuple[str, Optional[str]]],
186
  system_prompt: str,
187
  max_new_tokens: int,
188
  temperature: float,
189
  top_p: float,
190
  ):
191
+ # First turn: ignore your_reply and generate the initial UserLM utterance
192
+ if not history_pairs:
193
+ userlm = generate_reply(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  system_prompt,
195
+ [],
196
  max_new_tokens=max_new_tokens,
197
  temperature=temperature,
198
  top_p=top_p,
199
  )
200
+ history_pairs = [(userlm, None)]
201
+ return pairs_to_ui_messages(history_pairs), history_pairs, ""
202
 
203
+ # Subsequent turns require your reply
204
+ if not your_reply.strip():
205
+ gr.Info("Type your (assistant) reply on the right, then click Generate.")
206
+ return pairs_to_ui_messages(history_pairs), history_pairs, ""
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ # Close the last pair with your reply
209
+ last_userlm, _ = history_pairs[-1]
210
+ history_pairs[-1] = (last_userlm, your_reply.strip())
211
 
212
+ # Generate the next UserLM utterance
213
+ userlm = generate_reply(
 
214
  system_prompt,
215
+ history_pairs,
216
  max_new_tokens=max_new_tokens,
217
  temperature=temperature,
218
  top_p=top_p,
219
  )
220
+ history_pairs.append((userlm, None))
221
 
222
+ return pairs_to_ui_messages(history_pairs), history_pairs, ""
 
 
 
223
 
224
 
225
+ def _clear():
226
+ return [], [], DEFAULT_SYSTEM_PROMPT, ""
227
 
228
 
 
 
 
229
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
230
  gr.Markdown(
231
  f"""
232
+ # UserLM-8b: User Language Model Demo
233
+ **Model:** `{MODEL_ID}`
234
+
235
+ The AI plays the **user**, you play the **assistant**. Your messages appear on the **right**.
236
+ """
 
237
  )
238
 
239
+ system_box = gr.Textbox(
240
+ label="User Intent",
241
+ value=DEFAULT_SYSTEM_PROMPT,
242
+ lines=3,
243
+ placeholder="Enter the user's goal or intent",
244
+ )
 
245
 
246
+ # Use messages format so we can control left/right explicitly
247
  chatbot = gr.Chatbot(
 
248
  label="Conversation",
249
+ height=420,
250
+ type="messages", # modern format; tuples are deprecated
251
+ render_markdown=True,
252
+ autoscroll=True,
253
+ show_copy_button=True,
254
+ # You can set avatar images like: avatar_images=("assets/you.png", "assets/userlm.png")
255
  )
256
 
257
+ # Your reply box (you play the assistant)
258
+ msg = gr.Textbox(
259
+ label="Your Reply (assistant)",
260
+ placeholder="Type your assistant response here…",
261
+ lines=2,
262
+ )
263
 
264
  with gr.Accordion("Generation Settings", open=False):
265
+ max_new_tokens = gr.Slider(16, 512, value=128, step=16, label="max_new_tokens")
266
  temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="temperature")
267
  top_p = gr.Slider(0.0, 1.0, value=0.8, step=0.01, label="top_p")
268
 
 
270
  submit_btn = gr.Button("Generate", variant="primary")
271
  clear_btn = gr.Button("Clear")
272
 
273
+ # Internal state keeps the compact (userLM, you) pairs used for decoding
274
+ history_pairs_state = gr.State([]) # List[Tuple[str, Optional[str]]]
275
 
276
  with gr.Accordion("Implementation Details", open=False):
277
  gr.Markdown(
278
  """
279
+ - Decoding defaults from the model card: `temperature=1.0`, `top_p=0.8`, stop on `<|eot_id|>`, and block `<|endconversation|>`.
280
+ - Guardrails from Appendix C.1: (1) first-token logit filter, (2) block endconversation, (3) 3–25 word length, (4) verbatim repetition filter.
 
 
 
281
  """
282
  )
283
 
284
+ def _submit(your_text, pairs, sys_prompt, mnt, temp, tp):
285
+ ui_msgs, new_pairs, cleared_text = respond(
286
+ your_text, pairs, sys_prompt, mnt, temp, tp
287
+ )
288
+ return ui_msgs, new_pairs, cleared_text
289
 
290
  submit_btn.click(
291
  fn=_submit,
292
+ inputs=[
293
+ msg,
294
+ history_pairs_state,
295
+ system_box,
296
+ max_new_tokens,
297
+ temperature,
298
+ top_p,
299
+ ],
300
+ outputs=[chatbot, history_pairs_state, msg],
301
  )
302
  msg.submit(
303
  fn=_submit,
304
+ inputs=[
305
+ msg,
306
+ history_pairs_state,
307
+ system_box,
308
+ max_new_tokens,
309
+ temperature,
310
+ top_p,
311
+ ],
312
+ outputs=[chatbot, history_pairs_state, msg],
313
  )
314
 
315
+ clear_btn.click(
316
+ fn=_clear,
317
+ outputs=[chatbot, history_pairs_state, system_box, msg],
318
+ )
 
 
 
 
 
 
 
319
 
320
  if __name__ == "__main__":
321
+ demo.queue().launch()