Spaces:

Metal3d
/

force-reasoning-any-model

Running on Zero

App Files Files Community

Metal3d commited on Mar 22

Commit

55737cf

unverified ·

1 Parent(s): 13032ef

Forbid the user to produce the answer marker

Browse files

Files changed (1) hide show

app.py +11 -6

app.py CHANGED Viewed

@@ -16,6 +16,9 @@ if gr.NO_RELOAD:
         torch_dtype="auto",
     )
 # the sentences starting the reasoning step by step
 rethink_prepends = [
     "OK, I need to figure out ",
@@ -28,7 +31,7 @@ rethink_prepends = [
     "I think I have a good grasp ",
     "Now, using all the above information, I can answer the question using the original language used for the question:"
     "\n{question}\n"
-    "\n**ANSWER**\n",
 ]
@@ -52,7 +55,9 @@ def reformat_math(text):
 def user_input(message, history: list):
     """Append the user input in the history and clean the input textbox"""
-    return "", history + [gr.ChatMessage(role="user", content=message)]
 def rebuild_messages(history: list):
@@ -103,7 +108,7 @@ def bot(history: list, max_num_tokens: int, final_num_tokens: int):
         messages[-1]["content"] += prepend.format(question=question)
         num_tokens = int(
-            max_num_tokens if "**ANSWER**" not in prepend else final_num_tokens
         )
         t = threading.Thread(
             target=pipe,
@@ -117,7 +122,7 @@ def bot(history: list, max_num_tokens: int, final_num_tokens: int):
         # rebuild the history with the new content
         history[-1].content += prepend
-        if "**ANSWER**" in prepend:
             # stop thinking, this is the answer now (no metadata for intermediate steps)
             history.append(gr.ChatMessage(role="assistant", content=""))
         for token in streamer:
@@ -179,8 +184,8 @@ with gr.Blocks(fill_height=True, title="Making any model reasoning") as demo:
             A good value is 200 to 255.
             """)
             gr.Markdown("""
-            This interface can work on personal computer with 6Go VRAM (e.g. NVidia 30NV). Feel free to fork
-            the application and try others instruct models.
             """)
     # when the user submit a message, the bot will answer

         torch_dtype="auto",
     )
+# the answer marker to detect final answer
+ANSWER_MARKER = "**ANSWER**"
 # the sentences starting the reasoning step by step
 rethink_prepends = [
     "OK, I need to figure out ",
     "I think I have a good grasp ",
     "Now, using all the above information, I can answer the question using the original language used for the question:"
     "\n{question}\n"
+    f"\n{ANSWER_MARKER}\n",
 ]
 def user_input(message, history: list):
     """Append the user input in the history and clean the input textbox"""
+    return "", history + [
+        gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
+    ]
 def rebuild_messages(history: list):
         messages[-1]["content"] += prepend.format(question=question)
         num_tokens = int(
+            max_num_tokens if ANSWER_MARKER not in prepend else final_num_tokens
         )
         t = threading.Thread(
             target=pipe,
         # rebuild the history with the new content
         history[-1].content += prepend
+        if ANSWER_MARKER in prepend:
             # stop thinking, this is the answer now (no metadata for intermediate steps)
             history.append(gr.ChatMessage(role="assistant", content=""))
         for token in streamer:
             A good value is 200 to 255.
             """)
             gr.Markdown("""
+            This interface can work on personal computer with 6Go VRAM (e.g. NVidia 3050/3060 on laptop).
+            Feel free to fork the application and try others instruct models.
             """)
     # when the user submit a message, the bot will answer