Spaces:
Running
on
Zero
Running
on
Zero
Forbid the user to produce the answer marker
Browse files
app.py
CHANGED
|
@@ -16,6 +16,9 @@ if gr.NO_RELOAD:
|
|
| 16 |
torch_dtype="auto",
|
| 17 |
)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
# the sentences starting the reasoning step by step
|
| 20 |
rethink_prepends = [
|
| 21 |
"OK, I need to figure out ",
|
|
@@ -28,7 +31,7 @@ rethink_prepends = [
|
|
| 28 |
"I think I have a good grasp ",
|
| 29 |
"Now, using all the above information, I can answer the question using the original language used for the question:"
|
| 30 |
"\n{question}\n"
|
| 31 |
-
"\n
|
| 32 |
]
|
| 33 |
|
| 34 |
|
|
@@ -52,7 +55,9 @@ def reformat_math(text):
|
|
| 52 |
|
| 53 |
def user_input(message, history: list):
|
| 54 |
"""Append the user input in the history and clean the input textbox"""
|
| 55 |
-
return "", history + [
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
def rebuild_messages(history: list):
|
|
@@ -103,7 +108,7 @@ def bot(history: list, max_num_tokens: int, final_num_tokens: int):
|
|
| 103 |
messages[-1]["content"] += prepend.format(question=question)
|
| 104 |
|
| 105 |
num_tokens = int(
|
| 106 |
-
max_num_tokens if
|
| 107 |
)
|
| 108 |
t = threading.Thread(
|
| 109 |
target=pipe,
|
|
@@ -117,7 +122,7 @@ def bot(history: list, max_num_tokens: int, final_num_tokens: int):
|
|
| 117 |
|
| 118 |
# rebuild the history with the new content
|
| 119 |
history[-1].content += prepend
|
| 120 |
-
if
|
| 121 |
# stop thinking, this is the answer now (no metadata for intermediate steps)
|
| 122 |
history.append(gr.ChatMessage(role="assistant", content=""))
|
| 123 |
for token in streamer:
|
|
@@ -179,8 +184,8 @@ with gr.Blocks(fill_height=True, title="Making any model reasoning") as demo:
|
|
| 179 |
A good value is 200 to 255.
|
| 180 |
""")
|
| 181 |
gr.Markdown("""
|
| 182 |
-
This interface can work on personal computer with 6Go VRAM (e.g. NVidia
|
| 183 |
-
the application and try others instruct models.
|
| 184 |
""")
|
| 185 |
|
| 186 |
# when the user submit a message, the bot will answer
|
|
|
|
| 16 |
torch_dtype="auto",
|
| 17 |
)
|
| 18 |
|
| 19 |
+
# the answer marker to detect final answer
|
| 20 |
+
ANSWER_MARKER = "**ANSWER**"
|
| 21 |
+
|
| 22 |
# the sentences starting the reasoning step by step
|
| 23 |
rethink_prepends = [
|
| 24 |
"OK, I need to figure out ",
|
|
|
|
| 31 |
"I think I have a good grasp ",
|
| 32 |
"Now, using all the above information, I can answer the question using the original language used for the question:"
|
| 33 |
"\n{question}\n"
|
| 34 |
+
f"\n{ANSWER_MARKER}\n",
|
| 35 |
]
|
| 36 |
|
| 37 |
|
|
|
|
| 55 |
|
| 56 |
def user_input(message, history: list):
|
| 57 |
"""Append the user input in the history and clean the input textbox"""
|
| 58 |
+
return "", history + [
|
| 59 |
+
gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
|
| 60 |
+
]
|
| 61 |
|
| 62 |
|
| 63 |
def rebuild_messages(history: list):
|
|
|
|
| 108 |
messages[-1]["content"] += prepend.format(question=question)
|
| 109 |
|
| 110 |
num_tokens = int(
|
| 111 |
+
max_num_tokens if ANSWER_MARKER not in prepend else final_num_tokens
|
| 112 |
)
|
| 113 |
t = threading.Thread(
|
| 114 |
target=pipe,
|
|
|
|
| 122 |
|
| 123 |
# rebuild the history with the new content
|
| 124 |
history[-1].content += prepend
|
| 125 |
+
if ANSWER_MARKER in prepend:
|
| 126 |
# stop thinking, this is the answer now (no metadata for intermediate steps)
|
| 127 |
history.append(gr.ChatMessage(role="assistant", content=""))
|
| 128 |
for token in streamer:
|
|
|
|
| 184 |
A good value is 200 to 255.
|
| 185 |
""")
|
| 186 |
gr.Markdown("""
|
| 187 |
+
This interface can work on personal computer with 6Go VRAM (e.g. NVidia 3050/3060 on laptop).
|
| 188 |
+
Feel free to fork the application and try others instruct models.
|
| 189 |
""")
|
| 190 |
|
| 191 |
# when the user submit a message, the bot will answer
|