UserLM

Running on Zero

App Files Files Community

pszemraj commited on Oct 11

Commit

006fc23

verified ·

1 Parent(s): 15dc377

fix turn representation

Browse files

Files changed (1) hide show

app.py +77 -81

app.py CHANGED Viewed

@@ -65,38 +65,37 @@ model.eval()
 # ----------------------
-def build_messages(
     system_prompt: str, history: List[Tuple[str, str]]
 ) -> List[Dict[str, str]]:
-    """Transform Gradio history [(user, assistant), ...] into chat template messages."""
     messages: List[Dict[str, str]] = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt.strip()})
     for user_msg, assistant_msg in history:
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
-    return messages
-def apply_first_token_filter(
-    logits: torch.Tensor, filter_ids: List[int]
-) -> torch.Tensor:
-    """Apply logit filter for problematic first tokens (Guardrail 1)."""
-    logits_filtered = logits.clone()
-    for token_id in filter_ids:
-        logits_filtered[0, -1, token_id] = float("-inf")
-    return logits_filtered
 def is_valid_length(text: str, min_words: int = 3, max_words: int = 50) -> bool:
-    """Check if generated text meets length requirements (Guardrail 3).
-    Paper used max_words=25 for their simulation experiments, but we use 50
-    for interactive demo to allow slightly longer responses while still preventing
-    the model from revealing the entire intent at once.
-    """
     word_count = len(text.split())
     return min_words <= word_count <= max_words
@@ -111,7 +110,7 @@ def is_verbatim_repetition(
     if new_text_normalized == system_prompt.strip().lower():
         return True
-    # Check against previous user messages
     for user_msg, _ in history:
         if user_msg and new_text_normalized == user_msg.strip().lower():
             return True
@@ -120,7 +119,7 @@ def is_verbatim_repetition(
 @spaces.GPU
-def generate_reply(
     messages: List[Dict[str, str]],
     history: List[Tuple[str, str]],
     system_prompt: str,
@@ -129,17 +128,9 @@ def generate_reply(
     top_p: float = 0.8,
     max_retries: int = 5,
 ) -> str:
-    """Run generation with guardrails from Appendix C.1.
-    Implements all 4 guardrails from the paper:
-    1. Filter problematic first tokens
-    2. Optionally avoid dialogue termination (disabled by default for demo)
-    3. Enforce length thresholds with retry
-    4. Filter verbatim repetitions with retry
-    """
     for attempt in range(max_retries):
-        # Prepare input ids using the model's chat template
         inputs = tokenizer.apply_chat_template(
             messages,
             return_tensors="pt",
@@ -155,10 +146,9 @@ def generate_reply(
                 max_new_tokens=max_new_tokens,
                 eos_token_id=EOS_TOKEN_ID,
                 pad_token_id=tokenizer.eos_token_id,
-                bad_words_ids=BAD_WORDS_IDS,  # Prevents <|endconversation|>
             )
-        # Slice off the prompt tokens to get only the new text
         generated = outputs[0][inputs.shape[1] :]
         text = tokenizer.decode(generated, skip_special_tokens=True).strip()
@@ -169,10 +159,9 @@ def generate_reply(
         if is_verbatim_repetition(text, history, system_prompt):
             continue
-        # Success - return the valid text
         return text
-    # If all retries failed, return a fallback message
     return "(Unable to generate valid response after multiple attempts)"
@@ -181,32 +170,39 @@ def generate_reply(
 # ----------------------
-def respond(
-    assistant_message: str,
     chat_history: List[Tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
 ):
-    """Generate next user turn.
     Flow:
-    - If history empty + no assistant msg: Generate first user turn
-    - If history exists: Fill in assistant response to last turn, then generate next user turn
     """
-    # Update history with assistant's message (if provided)
-    if assistant_message.strip() and len(chat_history) > 0:
-        # Fill in the assistant response slot for the last turn
         last_user_msg, _ = chat_history[-1]
-        chat_history[-1] = (last_user_msg, assistant_message.strip())
-    # Build messages for user turn generation
-    messages = build_messages(system_prompt, chat_history)
     try:
-        user_reply = generate_reply(
             messages,
             chat_history,
             system_prompt,
@@ -215,16 +211,20 @@ def respond(
             top_p=top_p,
         )
     except Exception as e:
-        user_reply = f"(Generation error: {e})"
     # Add new user message to history (with empty assistant slot)
-    chat_history = chat_history + [(user_reply, "")]
-    return chat_history, chat_history
-def clear_state():
-    return [], DEFAULT_SYSTEM_PROMPT
 # ----------------------
@@ -236,9 +236,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # UserLM-8b: User Language Model Demo
     **How to use:**
-    1. Set the user's intent in the box below (what the user wants to accomplish)
-    2. Click **Generate User Message** to create the first user message
-    3. Type assistant responses and click Generate to continue the conversation
     **Model:** `{MODEL_ID}` on **{device}**
     """
@@ -249,15 +250,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             label="User Intent",
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
-            placeholder="Enter a high-level user intent (e.g., 'You are a user who wants to...')",
         )
-    chatbot = gr.Chatbot(height=420, label="Conversation")
     with gr.Row():
         msg = gr.Textbox(
-            label="Assistant Response (optional for first turn)",
-            placeholder="Leave empty to generate first user message, or type assistant response to continue",
             lines=2,
         )
@@ -267,54 +273,44 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         top_p = gr.Slider(0.0, 1.0, value=0.8, step=0.01, label="top_p")
     with gr.Row():
-        submit_btn = gr.Button("Generate User Message", variant="primary")
         clear_btn = gr.Button("Clear")
-    state = gr.State([])  # chat history state: List[Tuple[user, assistant]]
     with gr.Accordion("Implementation Details", open=False):
         gr.Markdown(
             """
         ### Generation Strategy
-        Based on [Appendix C.1](https://arxiv.org/abs/2510.06552) of the UserLM paper, this demo implements:
-        - **Recommended sampling:** temp=1.0, top_p=0.8 (not the typical 0.8/0.9)
-        - **First token filtering:** Blocks problematic tokens (I, You, Here) that cause repetition
-        - **Length constraints:** 3-50 words per turn to prevent revealing entire intent at once
         - **Repetition filtering:** Prevents verbatim copies of prior turns
-        These guardrails are essential for the 8B model to produce realistic user behavior.
-        **Note:** Unlike assistant LMs, UserLM simulates human *users* in conversations.
         """
         )
     def _submit(asst_text, history, system_prompt, mnt, temp, tp):
-        new_history, visible = respond(asst_text, history, system_prompt, mnt, temp, tp)
-        return "", visible
     submit_btn.click(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
-        outputs=[msg, chatbot],
     )
     msg.submit(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
-        outputs=[msg, chatbot],
     )
-    # Keep state in sync with the visible Chatbot
-    def _sync_state(chat):
-        return chat
-    chatbot.change(_sync_state, inputs=[chatbot], outputs=[state])
-    def _clear():
-        history, sys = clear_state()
-        return history, sys, history, ""
-    clear_btn.click(_clear, outputs=[state, system_box, chatbot, msg])
 if __name__ == "__main__":
-    demo.queue().launch()

 # ----------------------
+def build_messages_for_userlm(
     system_prompt: str, history: List[Tuple[str, str]]
 ) -> List[Dict[str, str]]:
+    """Build messages for UserLM generation.
+    In history tuples: (user_msg, assistant_msg) where:
+    - user_msg: what UserLM previously generated
+    - assistant_msg: what the human (playing assistant) said
+    For UserLM training, these roles were flipped, so we need to reconstruct
+    the conversation as UserLM saw it during training.
+    """
     messages: List[Dict[str, str]] = []
+    # System prompt defines the user's intent
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt.strip()})
+    # Add conversation history in the format UserLM expects
+    # UserLM was trained to generate "user" role messages given prior context
     for user_msg, assistant_msg in history:
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
+    return messages
 def is_valid_length(text: str, min_words: int = 3, max_words: int = 50) -> bool:
+    """Check if generated text meets length requirements (Guardrail 3)."""
     word_count = len(text.split())
     return min_words <= word_count <= max_words
     if new_text_normalized == system_prompt.strip().lower():
         return True
+    # Check against previous user messages (UserLM's prior outputs)
     for user_msg, _ in history:
         if user_msg and new_text_normalized == user_msg.strip().lower():
             return True
 @spaces.GPU
+def generate_user_message(
     messages: List[Dict[str, str]],
     history: List[Tuple[str, str]],
     system_prompt: str,
     top_p: float = 0.8,
     max_retries: int = 5,
 ) -> str:
+    """Generate a user message with guardrails from Appendix C.1."""
     for attempt in range(max_retries):
         inputs = tokenizer.apply_chat_template(
             messages,
             return_tensors="pt",
                 max_new_tokens=max_new_tokens,
                 eos_token_id=EOS_TOKEN_ID,
                 pad_token_id=tokenizer.eos_token_id,
+                bad_words_ids=BAD_WORDS_IDS,
             )
         generated = outputs[0][inputs.shape[1] :]
         text = tokenizer.decode(generated, skip_special_tokens=True).strip()
         if is_verbatim_repetition(text, history, system_prompt):
             continue
         return text
+    # If all retries failed
     return "(Unable to generate valid response after multiple attempts)"
 # ----------------------
+def generate_next_turn(
+    assistant_response: str,
     chat_history: List[Tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
 ):
+    """
+    Generate the next user message from UserLM.
     Flow:
+    - If chat_history is empty: Generate first user message
+    - If chat_history exists:
+        1. Add assistant's response to last turn
+        2. Generate next user message
+    Tuple structure: (user_message_from_userlm, assistant_response_from_human)
+    - Position 0 (left): UserLM's messages
+    - Position 1 (right): Human's assistant responses
     """
+    # If we have an assistant response, add it to the last turn
+    if assistant_response.strip() and len(chat_history) > 0:
         last_user_msg, _ = chat_history[-1]
+        chat_history = chat_history[:-1] + [(last_user_msg, assistant_response.strip())]
+    # Build messages for UserLM
+    messages = build_messages_for_userlm(system_prompt, chat_history)
+    # Generate next user message
     try:
+        user_msg = generate_user_message(
             messages,
             chat_history,
             system_prompt,
             top_p=top_p,
         )
     except Exception as e:
+        user_msg = f"(Generation error: {e})"
     # Add new user message to history (with empty assistant slot)
+    new_history = chat_history + [(user_msg, "")]
+    # Determine button text for next action
+    needs_assistant_response = True
+    button_text = "Generate Next User Message"
+    return "", new_history, button_text
+def clear_conversation():
+    return [], DEFAULT_SYSTEM_PROMPT, "Generate First User Message"
 # ----------------------
     # UserLM-8b: User Language Model Demo
     **How to use:**
+    1. Set the user's intent below
+    2. Click "Generate First User Message"
+    3. Type your assistant response and click "Generate Next User Message"
+    4. Repeat step 3 to continue the conversation
     **Model:** `{MODEL_ID}` on **{device}**
     """
             label="User Intent",
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
+            placeholder="Enter what the user wants to accomplish",
         )
+    chatbot = gr.Chatbot(
+        height=420,
+        label="Conversation",
+        type="tuples",
+        # Left side = UserLM (simulated user), Right side = You (playing assistant)
+    )
     with gr.Row():
         msg = gr.Textbox(
+            label="Your Assistant Response",
+            placeholder="Type your assistant response here (leave empty for first turn)",
             lines=2,
         )
         top_p = gr.Slider(0.0, 1.0, value=0.8, step=0.01, label="top_p")
     with gr.Row():
+        submit_btn = gr.Button("Generate First User Message", variant="primary")
         clear_btn = gr.Button("Clear")
+    state = gr.State([])
     with gr.Accordion("Implementation Details", open=False):
         gr.Markdown(
             """
         ### Generation Strategy
+        Based on [Appendix C.1](https://arxiv.org/abs/2510.06552), this implements:
+        - **Sampling:** temp=1.0, top_p=0.8 (paper recommendations)
+        - **First token filtering:** Blocks I/You/Here to prevent repetition
+        - **Length constraints:** 3-50 words to avoid revealing entire intent at once
         - **Repetition filtering:** Prevents verbatim copies of prior turns
+        **Note:** UserLM simulates human users, not assistants. You play the assistant role.
         """
         )
     def _submit(asst_text, history, system_prompt, mnt, temp, tp):
+        return generate_next_turn(asst_text, history, system_prompt, mnt, temp, tp)
     submit_btn.click(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
+        outputs=[msg, state, submit_btn],
     )
     msg.submit(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
+        outputs=[msg, state, submit_btn],
     )
+    # Keep chatbot display in sync with state
+    state.change(lambda x: x, inputs=[state], outputs=[chatbot])
+    clear_btn.click(fn=clear_conversation, outputs=[state, system_box, submit_btn])
 if __name__ == "__main__":
+    demo.queue().launch()