UserLM

Sleeping

App Files Files Community

pszemraj commited on Oct 11

Commit

15dc377

verified ·

1 Parent(s): 5bc353f

better UX

Browse files

Files changed (1) hide show

app.py +53 -62

app.py CHANGED Viewed

@@ -182,18 +182,31 @@ def generate_reply(
 def respond(
-    user_message: str,
     chat_history: List[Tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
 ):
-    # Build messages including prior turns
-    messages = build_messages(system_prompt, chat_history + [(user_message, "")])
     try:
-        reply = generate_reply(
             messages,
             chat_history,
             system_prompt,
@@ -202,9 +215,11 @@ def respond(
             top_p=top_p,
         )
     except Exception as e:
-        reply = f"(Generation error: {e})"
-    chat_history = chat_history + [(user_message, reply)]
     return chat_history, chat_history
@@ -220,86 +235,62 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         f"""
     # UserLM-8b: User Language Model Demo
-    **Model:** `{MODEL_ID}` on **{device}**
-    This demo implements the generation guardrails from [Appendix C.1](https://arxiv.org/abs/2510.06552) of the paper:
-    - Filters problematic first tokens (I, You, Here) that cause repetition
-    - Enforces length thresholds (3-50 words per turn)
-    - Prevents verbatim repetition of prior turns
-    - Uses recommended sampling params: temp=1.0, top_p=0.8
-    **Note:** Unlike typical assistant LMs, UserLM simulates *human users* in conversations.
-    The system prompt defines the user's high-level intent.
     """
     )
     with gr.Row():
         system_box = gr.Textbox(
-            label="User Intent (System Prompt)",
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
             placeholder="Enter a high-level user intent (e.g., 'You are a user who wants to...')",
         )
-    chatbot = gr.Chatbot(height=420, label="Simulated User-Assistant Conversation")
     with gr.Row():
         msg = gr.Textbox(
-            label="Assistant Response",
-            placeholder="Type the assistant's response to the user",
             lines=2,
         )
-    with gr.Accordion(
-        "Generation Settings (Based on Paper Recommendations)", open=False
-    ):
-        max_new_tokens = gr.Slider(
-            16,
-            512,
-            value=256,
-            step=16,
-            label="max_new_tokens",
-            info="Max tokens per user turn. Paper used stricter limits for simulation.",
-        )
-        temperature = gr.Slider(
-            0.0,
-            2.0,
-            value=1.0,
-            step=0.05,
-            label="temperature",
-            info="Paper recommends 1.0 for realistic user diversity",
-        )
-        top_p = gr.Slider(
-            0.0,
-            1.0,
-            value=0.8,
-            step=0.01,
-            label="top_p",
-            info="Paper recommends 0.8 (not 0.9)",
-        )
     with gr.Row():
-        submit_btn = gr.Button("Generate User Response", variant="primary")
         clear_btn = gr.Button("Clear")
     state = gr.State([])  # chat history state: List[Tuple[user, assistant]]
-    gr.Markdown(
         """
-    ### Usage Tips:
-    - The **system prompt** defines the user's goal (keep it high-level, not overly specific)
-    - Type what the **assistant says** in response
-    - Click **Generate User Response** to simulate how a human user would reply
-    - UserLM naturally reveals intent across multiple turns, not all at once
-    """
-    )
-    def _submit(user_text, history, system_prompt, mnt, temp, tp):
-        if not user_text or not user_text.strip():
-            return gr.update(), history
-        new_history, visible = respond(
-            user_text.strip(), history, system_prompt, mnt, temp, tp
         )
         return "", visible
     submit_btn.click(
@@ -326,4 +317,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     clear_btn.click(_clear, outputs=[state, system_box, chatbot, msg])
 if __name__ == "__main__":
-    demo.queue().launch()

 def respond(
+    assistant_message: str,
     chat_history: List[Tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
 ):
+    """Generate next user turn.
+    Flow:
+    - If history empty + no assistant msg: Generate first user turn
+    - If history exists: Fill in assistant response to last turn, then generate next user turn
+    """
+    # Update history with assistant's message (if provided)
+    if assistant_message.strip() and len(chat_history) > 0:
+        # Fill in the assistant response slot for the last turn
+        last_user_msg, _ = chat_history[-1]
+        chat_history[-1] = (last_user_msg, assistant_message.strip())
+    # Build messages for user turn generation
+    messages = build_messages(system_prompt, chat_history)
     try:
+        user_reply = generate_reply(
             messages,
             chat_history,
             system_prompt,
             top_p=top_p,
         )
     except Exception as e:
+        user_reply = f"(Generation error: {e})"
+    # Add new user message to history (with empty assistant slot)
+    chat_history = chat_history + [(user_reply, "")]
     return chat_history, chat_history
         f"""
     # UserLM-8b: User Language Model Demo
+    **How to use:**
+    1. Set the user's intent in the box below (what the user wants to accomplish)
+    2. Click **Generate User Message** to create the first user message
+    3. Type assistant responses and click Generate to continue the conversation
+    **Model:** `{MODEL_ID}` on **{device}**
     """
     )
     with gr.Row():
         system_box = gr.Textbox(
+            label="User Intent",
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
             placeholder="Enter a high-level user intent (e.g., 'You are a user who wants to...')",
         )
+    chatbot = gr.Chatbot(height=420, label="Conversation")
     with gr.Row():
         msg = gr.Textbox(
+            label="Assistant Response (optional for first turn)",
+            placeholder="Leave empty to generate first user message, or type assistant response to continue",
             lines=2,
         )
+    with gr.Accordion("Generation Settings", open=False):
+        max_new_tokens = gr.Slider(16, 512, value=256, step=16, label="max_new_tokens")
+        temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="temperature")
+        top_p = gr.Slider(0.0, 1.0, value=0.8, step=0.01, label="top_p")
     with gr.Row():
+        submit_btn = gr.Button("Generate User Message", variant="primary")
         clear_btn = gr.Button("Clear")
     state = gr.State([])  # chat history state: List[Tuple[user, assistant]]
+    with gr.Accordion("Implementation Details", open=False):
+        gr.Markdown(
+            """
+        ### Generation Strategy
+        Based on [Appendix C.1](https://arxiv.org/abs/2510.06552) of the UserLM paper, this demo implements:
+        - **Recommended sampling:** temp=1.0, top_p=0.8 (not the typical 0.8/0.9)
+        - **First token filtering:** Blocks problematic tokens (I, You, Here) that cause repetition
+        - **Length constraints:** 3-50 words per turn to prevent revealing entire intent at once
+        - **Repetition filtering:** Prevents verbatim copies of prior turns
+        These guardrails are essential for the 8B model to produce realistic user behavior.
+        **Note:** Unlike assistant LMs, UserLM simulates human *users* in conversations.
         """
         )
+    def _submit(asst_text, history, system_prompt, mnt, temp, tp):
+        new_history, visible = respond(asst_text, history, system_prompt, mnt, temp, tp)
         return "", visible
     submit_btn.click(
     clear_btn.click(_clear, outputs=[state, system_box, chatbot, msg])
 if __name__ == "__main__":
+    demo.queue().launch()