GRM2-Chat

Running on Zero

hysts HF Staff commited on Mar 6

Commit

329bc40

1 Parent(s): eb884c1

Fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -71,8 +71,8 @@ def _generate_on_gpu(
     thread.join()
     if exception_holder:
-        msg = f"Generation failed: {exception_holder[0]}"
-        raise gr.Error(msg)
 def generate(
@@ -87,15 +87,22 @@ def generate(
     if not message or not message.strip():
         raise gr.Error("Please enter a message.")
-    conversation = [*chat_history, {"role": "user", "content": message}]
     input_ids = tokenizer.apply_chat_template(
         conversation, add_generation_prompt=True, return_tensors="pt", return_dict=True
     ).input_ids
     n_input_tokens = input_ids.shape[1]
     if n_input_tokens > MAX_INPUT_TOKENS:
-        msg = f"Input too long ({n_input_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
-        raise gr.Error(msg)
     max_new_tokens = min(max_new_tokens, MAX_INPUT_TOKENS - n_input_tokens)
     if max_new_tokens <= 0:

     thread.join()
     if exception_holder:
+        error_msg = f"Generation failed: {exception_holder[0]}"
+        raise gr.Error(error_msg)
 def generate(
     if not message or not message.strip():
         raise gr.Error("Please enter a message.")
+    conversation = []
+    for hist_msg in chat_history:
+        if isinstance(hist_msg["content"], list):
+            text = "".join(part["text"] for part in hist_msg["content"] if part["type"] == "text")
+        else:
+            text = str(hist_msg["content"])
+        conversation.append({"role": hist_msg["role"], "content": text})
+    conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(
         conversation, add_generation_prompt=True, return_tensors="pt", return_dict=True
     ).input_ids
     n_input_tokens = input_ids.shape[1]
     if n_input_tokens > MAX_INPUT_TOKENS:
+        error_msg = f"Input too long ({n_input_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
+        raise gr.Error(error_msg)
     max_new_tokens = min(max_new_tokens, MAX_INPUT_TOKENS - n_input_tokens)
     if max_new_tokens <= 0: