Spaces:
Running
Running
WIP using tokenize endpoint directly
Browse files- internals.py +67 -16
internals.py
CHANGED
|
@@ -2,6 +2,8 @@ import streamlit as st
|
|
| 2 |
import requests
|
| 3 |
import json
|
| 4 |
|
|
|
|
|
|
|
| 5 |
def show_token(token: str, escape_markdown=True) -> str:
|
| 6 |
token_display = token.replace('\n', '↵').replace('\t', '⇥')
|
| 7 |
if escape_markdown:
|
|
@@ -14,6 +16,7 @@ def show_internals():
|
|
| 14 |
if 'messages' not in st.session_state or st.button("Start a new conversation"):
|
| 15 |
st.session_state['messages'] = [{"role": "user", "content": ""}]
|
| 16 |
st.session_state['msg_in_progress'] = ""
|
|
|
|
| 17 |
messages = st.session_state.messages
|
| 18 |
|
| 19 |
def rewind_to(i):
|
|
@@ -33,19 +36,55 @@ def show_internals():
|
|
| 33 |
if msg_in_progress is None:
|
| 34 |
msg_in_progress = ""
|
| 35 |
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
def append_token(word):
|
| 39 |
-
messages[-1]['content'] = st.session_state['msg_in_progress'] = (
|
| 40 |
-
msg_in_progress + word
|
| 41 |
-
)
|
| 42 |
-
|
| 43 |
def send_message():
|
| 44 |
other_role = "assistant" if last_role == "user" else "user"
|
| 45 |
st.session_state['messages'].append({"role": other_role, "content": ""})
|
| 46 |
st.session_state['msg_in_progress'] = ""
|
| 47 |
st.button("Send", on_click=send_message)
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# Make request to vLLM.
|
| 50 |
st.write(messages)
|
| 51 |
result = requests.post(
|
|
@@ -76,15 +115,27 @@ def show_internals():
|
|
| 76 |
"logprobs": tok_logprobs
|
| 77 |
})
|
| 78 |
|
| 79 |
-
# Add the last token (set "token" to None)
|
| 80 |
-
last_token_logprobs = result['choices'][0]['logprobs']['content'][0]['top_logprobs']
|
| 81 |
-
logprobs.append(
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
)
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
#st.write(last_token_logprobs)
|
| 90 |
st.write("Conversation so far as tokens (click to show logprobs):")
|
|
@@ -151,7 +202,7 @@ function escapeToken(token) {
|
|
| 151 |
|
| 152 |
{show_logprob_js}
|
| 153 |
|
| 154 |
-
|
| 155 |
</script>
|
| 156 |
"""
|
| 157 |
import streamlit.components.v1 as components
|
|
|
|
| 2 |
import requests
|
| 3 |
import json
|
| 4 |
|
| 5 |
+
placeholders_to_try = '#.?!@$%^&*()_+-=~`|;:"<>,./\\'
|
| 6 |
+
|
| 7 |
def show_token(token: str, escape_markdown=True) -> str:
|
| 8 |
token_display = token.replace('\n', '↵').replace('\t', '⇥')
|
| 9 |
if escape_markdown:
|
|
|
|
| 16 |
if 'messages' not in st.session_state or st.button("Start a new conversation"):
|
| 17 |
st.session_state['messages'] = [{"role": "user", "content": ""}]
|
| 18 |
st.session_state['msg_in_progress'] = ""
|
| 19 |
+
st.session_state['placeholder_token'] = placeholders_to_try[0]
|
| 20 |
messages = st.session_state.messages
|
| 21 |
|
| 22 |
def rewind_to(i):
|
|
|
|
| 36 |
if msg_in_progress is None:
|
| 37 |
msg_in_progress = ""
|
| 38 |
|
| 39 |
+
# Unfortunately chat templates include things like this:
|
| 40 |
+
# {%- set content = render_content(message.content, true)|trim %}
|
| 41 |
+
# so we can't include leading or trailing whitespace.
|
| 42 |
+
# Can't do much about leading whitespace, but we can at least allow trailing whitespace by including a special token for it.
|
| 43 |
+
# Unfortunately there's no single token that never gets joined with any other one, so we have to try a few different ones and see which one actually gets separated out by the tokenizer.
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
messages[-1]['content'] = msg_in_progress + st.session_state.placeholder_token
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
def send_message():
|
| 49 |
other_role = "assistant" if last_role == "user" else "user"
|
| 50 |
st.session_state['messages'].append({"role": other_role, "content": ""})
|
| 51 |
st.session_state['msg_in_progress'] = ""
|
| 52 |
st.button("Send", on_click=send_message)
|
| 53 |
|
| 54 |
+
token_ids_req = requests.post(
|
| 55 |
+
"https://vllm.thoughtful-ai.com/tokenize",
|
| 56 |
+
headers={"Content-Type": "application/json"},
|
| 57 |
+
json={
|
| 58 |
+
"model": "Qwen/Qwen3.5-9B",
|
| 59 |
+
"messages": messages,
|
| 60 |
+
"continue_final_message": True,
|
| 61 |
+
"add_generation_prompt": False,
|
| 62 |
+
"return_token_strs": True,
|
| 63 |
+
}
|
| 64 |
+
)
|
| 65 |
+
token_ids_req = token_ids_req.json()
|
| 66 |
+
token_ids = token_ids_req['tokens']
|
| 67 |
+
token_strs = token_ids_req['token_strs']
|
| 68 |
+
|
| 69 |
+
# completion given prompt token ids
|
| 70 |
+
tmp = requests.post(
|
| 71 |
+
"https://vllm.thoughtful-ai.com/v1/completions",
|
| 72 |
+
headers={"Content-Type": "application/json"},
|
| 73 |
+
json={
|
| 74 |
+
"model": "Qwen/Qwen3.5-9B",
|
| 75 |
+
"prompt": token_ids,
|
| 76 |
+
"max_tokens": 2,
|
| 77 |
+
"logprobs": 5,
|
| 78 |
+
"echo": True,
|
| 79 |
+
"top_logprobs": 5,
|
| 80 |
+
}
|
| 81 |
+
)
|
| 82 |
+
st.write(tmp.json()['choices'][0]['logprobs'])
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
# Make request to vLLM.
|
| 89 |
st.write(messages)
|
| 90 |
result = requests.post(
|
|
|
|
| 115 |
"logprobs": tok_logprobs
|
| 116 |
})
|
| 117 |
|
| 118 |
+
# # Add the last token (set "token" to None)
|
| 119 |
+
# last_token_logprobs = result['choices'][0]['logprobs']['content'][0]['top_logprobs']
|
| 120 |
+
# logprobs.append(
|
| 121 |
+
# {
|
| 122 |
+
# "token": None,
|
| 123 |
+
# "logprobs": {tok["token"]: tok["logprob"] for tok in last_token_logprobs}
|
| 124 |
+
# }
|
| 125 |
+
# )
|
| 126 |
+
|
| 127 |
+
# The last token was actually the placeholder token, so it serves as the "next token" whose logprobs we want to show. We can just replace it with None for display purposes.
|
| 128 |
+
if logprobs and logprobs[-1]['token'] == st.session_state.placeholder_token:
|
| 129 |
+
logprobs[-1]['token'] = None
|
| 130 |
+
# remove the placeholder token logprobs, since they aren't meaningful
|
| 131 |
+
logprobs[-1]['logprobs'] = {tok: logprob for tok, logprob in logprobs[-1]['logprobs'].items() if tok != st.session_state.placeholder_token}
|
| 132 |
+
else:
|
| 133 |
+
st.warning("Expected the last token to be the placeholder token, but it wasn't. Logprobs may not display correctly.")
|
| 134 |
+
if st.button("Try a different placeholder token"):
|
| 135 |
+
current_index = placeholders_to_try.index(st.session_state.placeholder_token)
|
| 136 |
+
next_index = (current_index + 1) % len(placeholders_to_try)
|
| 137 |
+
st.session_state.placeholder_token = placeholders_to_try[next_index]
|
| 138 |
+
st.rerun()
|
| 139 |
|
| 140 |
#st.write(last_token_logprobs)
|
| 141 |
st.write("Conversation so far as tokens (click to show logprobs):")
|
|
|
|
| 202 |
|
| 203 |
{show_logprob_js}
|
| 204 |
|
| 205 |
+
showLogprobs(allLogprobs.length - 1);
|
| 206 |
</script>
|
| 207 |
"""
|
| 208 |
import streamlit.components.v1 as components
|