Spaces:
Running
Running
File size: 7,661 Bytes
495786b 91f1cd3 cf7fe2b 91f1cd3 cf7fe2b 91f1cd3 cf7fe2b 91f1cd3 495786b 91f1cd3 cf7fe2b 495786b cf7fe2b 495786b 91f1cd3 495786b 91f1cd3 495786b 91f1cd3 495786b 91f1cd3 cf7fe2b 91f1cd3 cf7fe2b 91f1cd3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | # References for vLLM:
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/completion/protocol.py
import streamlit as st
import requests
import json
placeholders_to_try = '#.?!@$%^&*()_+-=~`|;:"<>,./\\'
def show_token(token: str, escape_markdown=True) -> str:
token_display = token.replace('\n', '↵').replace('\t', '⇥')
if escape_markdown:
for c in "\\`*_{}[]()#+-.!":
token_display = token_display.replace(c, "\\" + c)
return token_display
def show_internals():
if 'messages' not in st.session_state or st.button("Start a new conversation"):
st.session_state['messages'] = [{"role": "user", "content": ""}]
st.session_state['msg_in_progress'] = ""
st.session_state['placeholder_token'] = placeholders_to_try[0]
messages = st.session_state.messages
def rewind_to(i):
st.session_state.messages = st.session_state.messages[:i+1]
st.session_state['msg_in_progress'] = st.session_state.messages[-1]['content']
for i, message in enumerate(st.session_state.messages[:-1]):
with st.chat_message(message["role"]):
st.markdown(message["content"])
st.button("Edit", on_click=rewind_to, args=(i,), key=f"rewind_to_{i}")
# Display message-in-progress in chat message container
last_role = messages[-1]["role"]
with st.chat_message(last_role):
label = "Your message" if last_role == "user" else "Assistant response"
msg_in_progress = st.text_area(label, placeholder="Clicking the buttons below will update this field. You can also edit it directly; press Ctrl+Enter to apply changes.", height=300, key="msg_in_progress")
if msg_in_progress is None:
msg_in_progress = ""
# Unfortunately chat templates include things like this:
# {%- set content = render_content(message.content, true)|trim %}
# so we can't include leading or trailing whitespace.
# Can't do much about leading whitespace, but we can at least allow trailing whitespace by including a special token for it.
# Unfortunately there's no single token that never gets joined with any other one, so we have to try a few different ones and see which one actually gets separated out by the tokenizer.
messages[-1]['content'] = msg_in_progress + st.session_state.placeholder_token
st.write(messages)
def send_message():
other_role = "assistant" if last_role == "user" else "user"
st.session_state['messages'].append({"role": other_role, "content": ""})
st.session_state['msg_in_progress'] = ""
st.button("Send", on_click=send_message)
token_ids_req = requests.post(
"https://vllm.thoughtful-ai.com/tokenize",
headers={"Content-Type": "application/json"},
json={
"model": "Qwen/Qwen3.5-9B",
"messages": messages,
"continue_final_message": True,
"add_generation_prompt": False,
"return_token_strs": True,
}
)
token_ids_req = token_ids_req.json()
token_ids = token_ids_req['tokens']
token_strs = token_ids_req['token_strs']
# completion given prompt token ids
logprobs_request = requests.post(
"https://vllm.thoughtful-ai.com/v1/completions",
headers={"Content-Type": "application/json"},
json={
"model": "Qwen/Qwen3.5-9B",
"prompt": token_ids,
"max_tokens": 2,
"logprobs": 5,
"echo": True,
}
)
logprobs_request = logprobs_request.json()
complete_text = logprobs_request['choices'][0]['text']
logprobs_part = logprobs_request['choices'][0]['logprobs']
logprobs = []
for i in range(len(token_ids)):
if i == 0:
# first token has no logprobs, but show the token string.
logprobs.append({
"token": logprobs_part['tokens'][0],
"logprobs": None
})
continue
top_logprobs = logprobs_part['top_logprobs'][i]
logprobs.append({
"token": logprobs_part['tokens'][i],
"logprobs": {tok: logprob for tok, logprob in top_logprobs.items()}
})
#st.write(logprobs_part)
if logprobs and logprobs[-1]['token'] == st.session_state.placeholder_token:
logprobs[-1]['token'] = None
# remove the placeholder token logprobs, since they aren't meaningful
logprobs[-1]['logprobs'] = {tok: logprob for tok, logprob in logprobs[-1]['logprobs'].items() if tok != st.session_state.placeholder_token}
else:
st.warning("Expected the last token to be the placeholder token, but it wasn't. Logprobs may not display correctly.")
if st.button("Try a different placeholder token"):
current_index = placeholders_to_try.index(st.session_state.placeholder_token)
next_index = (current_index + 1) % len(placeholders_to_try)
st.session_state.placeholder_token = placeholders_to_try[next_index]
st.rerun()
#st.write(last_token_logprobs)
st.write("Conversation so far as tokens (click to show logprobs):")
logprobs_component(logprobs)
def logprobs_component(logprobs):
# logprobs is a list of tokens:
# {
# "token": "the",
# "logprobs": [{"the": -0.1, "a": -0.2, ...}]
# }
import html, json
html_out = ''
for i, entry in enumerate(logprobs):
token = entry['token']
if token is not None:
token_to_show = html.escape(show_token(token, escape_markdown=False))
else:
token_to_show = html.escape("[____]")
html_out += f'<span style="border: 1px solid black; display: inline-block;" onclick="showLogprobs({i})" title="Click to show logprobs for this token">{token_to_show}</span>'
show_logprob_js = '''
const makeElt = (tag, attrs, children) => {
const elt = document.createElement(tag);
for (const [attr, val] of Object.entries(attrs)) {
elt.setAttribute(attr, val);
}
for (const child of children) {
if(typeof child === 'string') {
elt.appendChild(document.createTextNode(child));
} else {
elt.appendChild(child);
}
}
return elt;
}
function escapeToken(token) {
return token.replace(/\\n/g, '↵').replace(/\\t/g, '⇥');
}
function showLogprobs(i) {
const logprobs = allLogprobs[i].logprobs;
const container = document.getElementById('logprobs-display');
container.innerHTML = '';
container.appendChild(makeElt('ul', {}, Object.entries(logprobs).map(([token, logprob]) => makeElt('li', {}, `${escapeToken(token)}: ${Math.exp(logprob)}`))));
}
'''
html_out = f"""
<style>
p.logprobs-container {{
background: white;
line-height: 1.5;
}}
p.logprobs-container > span {{
position: relative;
padding: 2px 1px;
border-radius: 3px;
}}
</style>
<p class="logprobs-container">{html_out}</p>
<div id="logprobs-display"></div>
<script>allLogprobs = {json.dumps(logprobs)};
{show_logprob_js}
showLogprobs(allLogprobs.length - 1);
</script>
"""
import streamlit.components.v1 as components
return components.html(html_out, height=300, scrolling=True)
show_internals()
|