UserLM

Running on Zero

App Files Files Community

pszemraj commited on Oct 11

Commit

b84bed8

verified ·

1 Parent(s): d9a9b75

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -75

app.py CHANGED Viewed

@@ -65,43 +65,88 @@ model.eval()
 # ----------------------
 def is_valid_length(text: str, min_words: int = 3, max_words: int = 50) -> bool:
-    """Check if generated text meets length requirements (Guardrail 3)."""
     word_count = len(text.split())
     return min_words <= word_count <= max_words
 def is_verbatim_repetition(
-    new_text: str, history: List[Dict], system_prompt: str
 ) -> bool:
-    """Check if text is exact repetition. History is now list of message dicts."""
     new_text_normalized = new_text.strip().lower()
     if new_text_normalized == system_prompt.strip().lower():
         return True
-    # Check against previous user messages
-    for msg in history:
-        if msg.get("role") == "user" and msg.get("content"):
-            if new_text_normalized == msg["content"].strip().lower():
-                return True
     return False
 @spaces.GPU
-def generate_user_message(
     messages: List[Dict[str, str]],
-    history: List[Dict],
     system_prompt: str,
     max_new_tokens: int = 256,
     temperature: float = 1.0,
     top_p: float = 0.8,
     max_retries: int = 5,
 ) -> str:
-    """Generate a user message with guardrails from Appendix C.1."""
     for attempt in range(max_retries):
         inputs = tokenizer.apply_chat_template(
             messages,
             return_tensors="pt",
@@ -117,9 +162,10 @@ def generate_user_message(
                 max_new_tokens=max_new_tokens,
                 eos_token_id=EOS_TOKEN_ID,
                 pad_token_id=tokenizer.eos_token_id,
-                bad_words_ids=BAD_WORDS_IDS,
             )
         generated = outputs[0][inputs.shape[1] :]
         text = tokenizer.decode(generated, skip_special_tokens=True).strip()
@@ -130,10 +176,13 @@ def generate_user_message(
         if is_verbatim_repetition(text, history, system_prompt):
             continue
         return text
-    # If all retries failed
-    return "(Unable to generate valid response after multiple attempts)"
 # ----------------------
@@ -141,35 +190,30 @@ def generate_user_message(
 # ----------------------
-def generate_next_turn(
-    assistant_response: str,
-    chat_history: List[Dict],
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
 ):
-    """
-    History format: List of {"role": "user"/"assistant", "content": "..."}
-    - "user" role = UserLM (displays LEFT)
-    - "assistant" role = Human (displays RIGHT)
-    """
-    # If we have an assistant response, add it to history
-    if assistant_response.strip():
-        chat_history.append(
-            {"role": "assistant", "content": assistant_response.strip()}
-        )
-    # Build messages for UserLM from history
-    messages = []
-    if system_prompt.strip():
-        messages.append({"role": "system", "content": system_prompt.strip()})
-    messages.extend(chat_history)
-    # Generate next user message
-    try:
-        user_msg = generate_user_message(
             messages,
             chat_history,
             system_prompt,
@@ -177,17 +221,46 @@ def generate_next_turn(
             temperature=temperature,
             top_p=top_p,
         )
-    except Exception as e:
-        user_msg = f"(Generation error: {e})"
-    # Add new user message to history
-    new_history = chat_history + [{"role": "user", "content": user_msg}]
-    return "", new_history, "Generate Next User Message"
-def clear_conversation():
-    return [], DEFAULT_SYSTEM_PROMPT, [], "Generate First User Message", []
 # ----------------------
@@ -198,13 +271,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         f"""
     # UserLM-8b: User Language Model Demo
-    **How to use:**
-    1. Set the user's intent below
-    2. Click "Generate First User Message"
-    3. Type your assistant response and click "Generate Next User Message"
-    4. Repeat step 3 to continue the conversation
-    **Model:** `{MODEL_ID}` on **{device}**
     """
     )
@@ -213,20 +282,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             label="User Intent",
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
-            placeholder="Enter what the user wants to accomplish",
         )
     chatbot = gr.Chatbot(
         height=420,
         label="Conversation",
-        type="messages",  # Changed from tuples to have more control
-        # Will manually format messages with role attribute
     )
     with gr.Row():
         msg = gr.Textbox(
-            label="Your Assistant Response",
-            placeholder="Type your assistant response here (leave empty for first turn)",
             lines=2,
         )
@@ -236,47 +305,49 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         top_p = gr.Slider(0.0, 1.0, value=0.8, step=0.01, label="top_p")
     with gr.Row():
-        submit_btn = gr.Button("Generate User Message", variant="primary")
         clear_btn = gr.Button("Clear")
-    state = gr.State([])
     with gr.Accordion("Implementation Details", open=False):
         gr.Markdown(
             """
-        ### Generation Strategy
-        Based on [Appendix C.1](https://arxiv.org/abs/2510.06552), this implements:
-        - **Sampling:** temp=1.0, top_p=0.8 (paper recommendations)
-        - **First token filtering:** Blocks I/You/Here to prevent repetition
-        - **Length constraints:** 3-50 words to avoid revealing entire intent at once
-        - **Repetition filtering:** Prevents verbatim copies of prior turns
-        **Note:** UserLM simulates human users, not assistants. You play the assistant role.
-        """
         )
     def _submit(asst_text, history, system_prompt, mnt, temp, tp):
-        new_msg, new_history = generate_next_turn(
-            asst_text, history, system_prompt, mnt, temp, tp
-        )
-        return new_msg, new_history, new_history
     submit_btn.click(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
-        outputs=[msg, state, chatbot],
     )
     msg.submit(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
-        outputs=[msg, state, chatbot],
     )
-    clear_btn.click(
-        fn=clear_conversation,
-        outputs=[state, system_box, chatbot],
-    )
 if __name__ == "__main__":
     demo.queue().launch()

 # ----------------------
+def build_messages(
+    system_prompt: str, history: List[Tuple[str, str]]
+) -> List[Dict[str, str]]:
+    """Transform Gradio history into chat template messages.
+    IMPORTANT: History is stored as (human_assistant_msg, model_user_msg) for display,
+    but we need to flip it back to (user, assistant) for the model's chat template.
+    """
+    messages: List[Dict[str, str]] = []
+    if system_prompt.strip():
+        messages.append({"role": "system", "content": system_prompt.strip()})
+    # Flip the roles: history stores (human's assistant msg, model's user msg)
+    for human_assistant, model_user in history:
+        if model_user:  # Model's user message
+            messages.append({"role": "user", "content": model_user})
+        if human_assistant:  # Human's assistant response
+            messages.append({"role": "assistant", "content": human_assistant})
+    return messages
+def apply_first_token_filter(
+    logits: torch.Tensor, filter_ids: List[int]
+) -> torch.Tensor:
+    """Apply logit filter for problematic first tokens (Guardrail 1)."""
+    logits_filtered = logits.clone()
+    for token_id in filter_ids:
+        logits_filtered[0, -1, token_id] = float("-inf")
+    return logits_filtered
 def is_valid_length(text: str, min_words: int = 3, max_words: int = 50) -> bool:
+    """Check if generated text meets length requirements (Guardrail 3).
+    Paper used max_words=25 for their simulation experiments, but we use 50
+    for interactive demo to allow slightly longer responses while still preventing
+    the model from revealing the entire intent at once.
+    """
     word_count = len(text.split())
     return min_words <= word_count <= max_words
 def is_verbatim_repetition(
+    new_text: str, history: List[Tuple[str, str]], system_prompt: str
 ) -> bool:
+    """Check if text is exact repetition of prior user turn or system prompt (Guardrail 4)."""
     new_text_normalized = new_text.strip().lower()
+    # Check against system prompt
     if new_text_normalized == system_prompt.strip().lower():
         return True
+    # Check against previous model user messages (stored in second position)
+    for _, model_user in history:
+        if model_user and new_text_normalized == model_user.strip().lower():
+            return True
     return False
 @spaces.GPU
+def generate_reply(
     messages: List[Dict[str, str]],
+    history: List[Tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 256,
     temperature: float = 1.0,
     top_p: float = 0.8,
     max_retries: int = 5,
 ) -> str:
+    """Run generation with guardrails from Appendix C.1.
+    Implements all 4 guardrails from the paper:
+    1. Filter problematic first tokens
+    2. Optionally avoid dialogue termination (disabled by default for demo)
+    3. Enforce length thresholds with retry
+    4. Filter verbatim repetitions with retry
+    """
     for attempt in range(max_retries):
+        # Prepare input ids using the model's chat template
         inputs = tokenizer.apply_chat_template(
             messages,
             return_tensors="pt",
                 max_new_tokens=max_new_tokens,
                 eos_token_id=EOS_TOKEN_ID,
                 pad_token_id=tokenizer.eos_token_id,
+                bad_words_ids=BAD_WORDS_IDS,  # Prevents <|endconversation|>
             )
+        # Slice off the prompt tokens to get only the new text
         generated = outputs[0][inputs.shape[1] :]
         text = tokenizer.decode(generated, skip_special_tokens=True).strip()
         if is_verbatim_repetition(text, history, system_prompt):
             continue
+        # Success - return the valid text
         return text
+    # If all retries failed, raise an error
+    raise RuntimeError(
+        f"Failed to generate valid response after {max_retries} attempts"
+    )
 # ----------------------
 # ----------------------
+def respond(
+    assistant_message: str,
+    chat_history: List[Tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
 ):
+    """Generate next user turn.
+    Flow:
+    - If history empty: Generate first user message (ignores assistant_message input)
+    - If history exists with assistant message: Add it and generate next user turn
+    - If history exists without assistant message: Warning to user
+    History format: (human_assistant_msg, model_user_msg) for proper display
+    """
+    # First message generation - ignore any text in the assistant box
+    if len(chat_history) == 0:
+        # Generate initial user message from system prompt alone
+        messages = build_messages(system_prompt, [])
+        user_reply = generate_reply(
             messages,
             chat_history,
             system_prompt,
             temperature=temperature,
             top_p=top_p,
         )
+        # Start conversation with first user message
+        chat_history = [("", user_reply)]
+        return chat_history, chat_history
+    # Subsequent messages - require assistant response
+    if not assistant_message.strip():
+        # User clicked generate without providing assistant response
+        # Just return current state without changes
+        gr.Info(
+            "Please type your assistant response before generating the next user message."
+        )
+        return chat_history, chat_history
+    # Update history with human's assistant message
+    if len(chat_history) > 0:
+        # Fill in the human's assistant response for the last turn
+        _, last_model_user = chat_history[-1]
+        chat_history[-1] = (assistant_message.strip(), last_model_user)
+    # Build messages for next user turn generation
+    messages = build_messages(system_prompt, chat_history)
+    user_reply = generate_reply(
+        messages,
+        chat_history,
+        system_prompt,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    # Add new model user message to history
+    chat_history = chat_history + [("", user_reply)]
+    return chat_history, chat_history
+def clear_state():
+    return [], DEFAULT_SYSTEM_PROMPT
 # ----------------------
         f"""
     # UserLM-8b: User Language Model Demo
+    **Model:** `{MODEL_ID}` | **Device:** `{device}`
+    The AI plays the user, you play the assistant.
     """
     )
             label="User Intent",
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
+            placeholder="Enter the user's goal or intent",
         )
+    # Display with role labels to clarify the reversal
     chatbot = gr.Chatbot(
         height=420,
         label="Conversation",
+        avatar_images=(None, None),  # Remove default avatars to avoid confusion
     )
     with gr.Row():
         msg = gr.Textbox(
+            label="Assistant Response",
+            placeholder="Leave empty for first generation, then type your responses",
             lines=2,
         )
         top_p = gr.Slider(0.0, 1.0, value=0.8, step=0.01, label="top_p")
     with gr.Row():
+        submit_btn = gr.Button("Generate", variant="primary")
         clear_btn = gr.Button("Clear")
+    state = gr.State([])  # chat history state: List[Tuple[human_assistant, model_user]]
     with gr.Accordion("Implementation Details", open=False):
         gr.Markdown(
             """
+            Based on Appendix C.1 of the UserLM paper:
+            - Sampling: temp=1.0, top_p=0.8
+            - First token filtering for problematic tokens
+            - Length constraints: 3-50 words
+            - Repetition filtering
+            """
         )
     def _submit(asst_text, history, system_prompt, mnt, temp, tp):
+        new_history, visible = respond(asst_text, history, system_prompt, mnt, temp, tp)
+        # Clear input box after submission
+        return "", visible
     submit_btn.click(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
+        outputs=[msg, chatbot],
     )
     msg.submit(
         fn=_submit,
         inputs=[msg, state, system_box, max_new_tokens, temperature, top_p],
+        outputs=[msg, chatbot],
     )
+    # Keep state in sync with the visible Chatbot
+    def _sync_state(chat):
+        return chat
+    chatbot.change(_sync_state, inputs=[chatbot], outputs=[state])
+    def _clear():
+        history, sys = clear_state()
+        return history, sys, history, ""
+    clear_btn.click(_clear, outputs=[state, system_box, chatbot, msg])
 if __name__ == "__main__":
     demo.queue().launch()