UserLM

Sleeping

App Files Files Community

pszemraj commited on Oct 11

Commit

c502b05

verified ·

1 Parent(s): b84bed8

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -31

app.py CHANGED Viewed

@@ -18,18 +18,16 @@ DEFAULT_SYSTEM_PROMPT = (
     "The first two numbers in the sequence are 1 and 1."
 )
-device = "cuda" if torch.cuda.is_available() else "cpu"
 def load_model(model_id: str = MODEL_ID):
     """Load tokenizer and model, with a reasonable dtype and device fallback."""
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    dtype = torch.float16 if device == "cuda" else torch.float32
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         trust_remote_code=True,
-        torch_dtype=dtype,
     )
     # Special tokens for stopping / filtering
@@ -57,7 +55,6 @@ def load_model(model_id: str = MODEL_ID):
 tokenizer, model, EOS_TOKEN_ID, BAD_WORDS_IDS, FIRST_TOKEN_FILTER_IDS = load_model()
-model = model.to(device)
 model.eval()
 # ----------------------
@@ -70,18 +67,17 @@ def build_messages(
 ) -> List[Dict[str, str]]:
     """Transform Gradio history into chat template messages.
-    IMPORTANT: History is stored as (human_assistant_msg, model_user_msg) for display,
-    but we need to flip it back to (user, assistant) for the model's chat template.
     """
     messages: List[Dict[str, str]] = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt.strip()})
-    # Flip the roles: history stores (human's assistant msg, model's user msg)
-    for human_assistant, model_user in history:
-        if model_user:  # Model's user message
             messages.append({"role": "user", "content": model_user})
-        if human_assistant:  # Human's assistant response
             messages.append({"role": "assistant", "content": human_assistant})
     return messages
@@ -118,8 +114,8 @@ def is_verbatim_repetition(
     if new_text_normalized == system_prompt.strip().lower():
         return True
-    # Check against previous model user messages (stored in second position)
-    for _, model_user in history:
         if model_user and new_text_normalized == model_user.strip().lower():
             return True
@@ -151,7 +147,7 @@ def generate_reply(
             messages,
             return_tensors="pt",
             add_generation_prompt=True,
-        ).to(device)
         with torch.no_grad():
             outputs = model.generate(
@@ -202,10 +198,9 @@ def respond(
     Flow:
     - If history empty: Generate first user message (ignores assistant_message input)
-    - If history exists with assistant message: Add it and generate next user turn
-    - If history exists without assistant message: Warning to user
-    History format: (human_assistant_msg, model_user_msg) for proper display
     """
     # First message generation - ignore any text in the assistant box
@@ -222,24 +217,21 @@ def respond(
             top_p=top_p,
         )
-        # Start conversation with first user message
-        chat_history = [("", user_reply)]
         return chat_history, chat_history
     # Subsequent messages - require assistant response
     if not assistant_message.strip():
         # User clicked generate without providing assistant response
-        # Just return current state without changes
         gr.Info(
             "Please type your assistant response before generating the next user message."
         )
         return chat_history, chat_history
-    # Update history with human's assistant message
-    if len(chat_history) > 0:
-        # Fill in the human's assistant response for the last turn
-        _, last_model_user = chat_history[-1]
-        chat_history[-1] = (assistant_message.strip(), last_model_user)
     # Build messages for next user turn generation
     messages = build_messages(system_prompt, chat_history)
@@ -253,8 +245,8 @@ def respond(
         top_p=top_p,
     )
-    # Add new model user message to history
-    chat_history = chat_history + [("", user_reply)]
     return chat_history, chat_history
@@ -271,7 +263,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         f"""
     # UserLM-8b: User Language Model Demo
-    **Model:** `{MODEL_ID}` | **Device:** `{device}`
     The AI plays the user, you play the assistant.
     """
@@ -285,11 +277,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             placeholder="Enter the user's goal or intent",
         )
-    # Display with role labels to clarify the reversal
     chatbot = gr.Chatbot(
         height=420,
         label="Conversation",
-        avatar_images=(None, None),  # Remove default avatars to avoid confusion
     )
     with gr.Row():
@@ -308,7 +298,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         submit_btn = gr.Button("Generate", variant="primary")
         clear_btn = gr.Button("Clear")
-    state = gr.State([])  # chat history state: List[Tuple[human_assistant, model_user]]
     with gr.Accordion("Implementation Details", open=False):
         gr.Markdown(

     "The first two numbers in the sequence are 1 and 1."
 )
 def load_model(model_id: str = MODEL_ID):
     """Load tokenizer and model, with a reasonable dtype and device fallback."""
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         trust_remote_code=True,
+        torch_dtype="auto",
+        device_map="auto",
     )
     # Special tokens for stopping / filtering
 tokenizer, model, EOS_TOKEN_ID, BAD_WORDS_IDS, FIRST_TOKEN_FILTER_IDS = load_model()
 model.eval()
 # ----------------------
 ) -> List[Dict[str, str]]:
     """Transform Gradio history into chat template messages.
+    History is stored as (model_user, human_assistant) tuples.
     """
     messages: List[Dict[str, str]] = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt.strip()})
+    # Each tuple is (model_user, human_assistant)
+    for model_user, human_assistant in history:
+        if model_user:
             messages.append({"role": "user", "content": model_user})
+        if human_assistant:
             messages.append({"role": "assistant", "content": human_assistant})
     return messages
     if new_text_normalized == system_prompt.strip().lower():
         return True
+    # Check against previous model user messages (first element in tuple)
+    for model_user, _ in history:
         if model_user and new_text_normalized == model_user.strip().lower():
             return True
             messages,
             return_tensors="pt",
             add_generation_prompt=True,
+        ).to(model.device)
         with torch.no_grad():
             outputs = model.generate(
     Flow:
     - If history empty: Generate first user message (ignores assistant_message input)
+    - If history exists: Add assistant response and generate next user turn
+    History format: (model_user, human_assistant)
     """
     # First message generation - ignore any text in the assistant box
             top_p=top_p,
         )
+        # Start conversation with first user message (empty assistant slot)
+        chat_history = [(user_reply, None)]
         return chat_history, chat_history
     # Subsequent messages - require assistant response
     if not assistant_message.strip():
         # User clicked generate without providing assistant response
         gr.Info(
             "Please type your assistant response before generating the next user message."
         )
         return chat_history, chat_history
+    # Update the last tuple with the assistant response
+    last_model_user, _ = chat_history[-1]
+    chat_history[-1] = (last_model_user, assistant_message.strip())
     # Build messages for next user turn generation
     messages = build_messages(system_prompt, chat_history)
         top_p=top_p,
     )
+    # Add new model user message (with empty assistant slot)
+    chat_history.append((user_reply, None))
     return chat_history, chat_history
         f"""
     # UserLM-8b: User Language Model Demo
+    **Model:** `{MODEL_ID}`
     The AI plays the user, you play the assistant.
     """
             placeholder="Enter the user's goal or intent",
         )
     chatbot = gr.Chatbot(
         height=420,
         label="Conversation",
     )
     with gr.Row():
         submit_btn = gr.Button("Generate", variant="primary")
         clear_btn = gr.Button("Clear")
+    state = gr.State([])  # chat history: List[Tuple[model_user, human_assistant]]
     with gr.Accordion("Implementation Details", open=False):
         gr.Markdown(