st192011 commited on
Commit
dbdd36b
·
verified ·
1 Parent(s): 6f0c92d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -14,7 +14,7 @@ MODEL_PATH = "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf"
14
  DEFAULT_SYSTEM_PROMPT = (
15
  "You are a Socratic assistant. Do not answer questions directly. "
16
  "Instead, respond exclusively with 3 deep, reflective questions. "
17
- "Then generate stop token"
18
  )
19
 
20
  # ==============================================================================
@@ -51,9 +51,8 @@ def streaming_chat(user_query, system_prompt):
51
 
52
  # These are the markers our Python function uses to slice the text
53
  stop_markers = [
54
- "Stop token", "stop token",
55
- "Stop.", "stop.",
56
- "Response:", "Response",
57
  "Assistant:"
58
  ]
59
 
@@ -128,7 +127,7 @@ We deployed `microsoft/bitnet-b1.58-2B-4T-gguf`. While this preserved its founda
128
 
129
  #### The Stop-Token Anchor Hack
130
  To enforce structure, we modified the System Prompt to force the model to declare its own stopping point:
131
- > *"You are a Socratic assistant... Respond exclusively with 3 deep, reflective questions. Then generate stop token"*
132
 
133
  This instruction forces the text-prediction engine to anchor itself on a predictable phrase. While the model still experiences trailing hallucinations, it prints a recognizable marker *immediately after* providing the high-quality questions.
134
 
@@ -158,9 +157,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
158
  gr.Markdown("### 🛠️ The \"Stop Token\" Hack")
159
  gr.Markdown(
160
  "**Base models don't know how to stop talking!**\n\n"
161
- "To prevent infinite loops, our system prompt instructs the model to literally type the words `Stop token` when it is finished. "
162
  "Our Python backend uses a **Lookahead Buffer** to watch for those words. If it sees them, it instantly slices them out and kills the engine.\n\n"
163
- "*🧪 Try deleting the words `'Then generate stop token'` from the prompt below and see what happens!*"
164
  )
165
 
166
  with gr.Column(scale=2):
 
14
  DEFAULT_SYSTEM_PROMPT = (
15
  "You are a Socratic assistant. Do not answer questions directly. "
16
  "Instead, respond exclusively with 3 deep, reflective questions. "
17
+ "Then generate %^%^%^"
18
  )
19
 
20
  # ==============================================================================
 
51
 
52
  # These are the markers our Python function uses to slice the text
53
  stop_markers = [
54
+ "%^%^%^",
55
+ "User:",
 
56
  "Assistant:"
57
  ]
58
 
 
127
 
128
  #### The Stop-Token Anchor Hack
129
  To enforce structure, we modified the System Prompt to force the model to declare its own stopping point:
130
+ > *"You are a Socratic assistant... Respond exclusively with 3 deep, reflective questions. Then generate %^%^%^"*
131
 
132
  This instruction forces the text-prediction engine to anchor itself on a predictable phrase. While the model still experiences trailing hallucinations, it prints a recognizable marker *immediately after* providing the high-quality questions.
133
 
 
157
  gr.Markdown("### 🛠️ The \"Stop Token\" Hack")
158
  gr.Markdown(
159
  "**Base models don't know how to stop talking!**\n\n"
160
+ "To prevent infinite loops, our system prompt instructs the model to literally type the words `%^%^%^` when it is finished. "
161
  "Our Python backend uses a **Lookahead Buffer** to watch for those words. If it sees them, it instantly slices them out and kills the engine.\n\n"
162
+ "*🧪 Try deleting the words `'Then generate %^%^%^'` from the prompt below and see what happens!*"
163
  )
164
 
165
  with gr.Column(scale=2):