kcarnold commited on
Commit
91f1cd3
·
unverified ·
1 Parent(s): 58ed19d

Add a vLLM-backed show-internals interface

Browse files
Files changed (1) hide show
  1. internals.py +160 -0
internals.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import json
4
+
5
+ def show_token(token: str, escape_markdown=True) -> str:
6
+ token_display = token.replace('\n', '↵').replace('\t', '⇥')
7
+ if escape_markdown:
8
+ for c in "\\`*_{}[]()#+-.!":
9
+ token_display = token_display.replace(c, "\\" + c)
10
+ return token_display
11
+
12
+
13
+ def show_internals():
14
+ if 'messages' not in st.session_state or st.button("Start a new conversation"):
15
+ st.session_state['messages'] = [{"role": "user", "content": ""}]
16
+ st.session_state['msg_in_progress'] = ""
17
+ messages = st.session_state.messages
18
+
19
+ def rewind_to(i):
20
+ st.session_state.messages = st.session_state.messages[:i+1]
21
+ st.session_state['msg_in_progress'] = st.session_state.messages[-1]['content']
22
+
23
+ for i, message in enumerate(st.session_state.messages[:-1]):
24
+ with st.chat_message(message["role"]):
25
+ st.markdown(message["content"])
26
+ st.button("Edit", on_click=rewind_to, args=(i,), key=f"rewind_to_{i}")
27
+
28
+ # Display message-in-progress in chat message container
29
+ last_role = messages[-1]["role"]
30
+ with st.chat_message(last_role):
31
+ label = "Your message" if last_role == "user" else "Assistant response"
32
+ msg_in_progress = st.text_area(label, placeholder="Clicking the buttons below will update this field. You can also edit it directly; press Ctrl+Enter to apply changes.", height=300, key="msg_in_progress")
33
+ if msg_in_progress is None:
34
+ msg_in_progress = ""
35
+
36
+ messages[-1]['content'] = msg_in_progress
37
+
38
+ def append_token(word):
39
+ messages[-1]['content'] = st.session_state['msg_in_progress'] = (
40
+ msg_in_progress + word
41
+ )
42
+
43
+ def send_message():
44
+ other_role = "assistant" if last_role == "user" else "user"
45
+ st.session_state['messages'].append({"role": other_role, "content": ""})
46
+ st.session_state['msg_in_progress'] = ""
47
+ st.button("Send", on_click=send_message)
48
+
49
+ # Make request to vLLM.
50
+ st.write(messages)
51
+ result = requests.post(
52
+ "https://vllm.thoughtful-ai.com/v1/chat/completions",
53
+ headers={"Content-Type": "application/json"},
54
+ json={
55
+ "model": "Qwen/Qwen3.5-9B",
56
+ "messages": messages,
57
+ "max_tokens": 2,
58
+ "logprobs": True,
59
+ "continue_final_message": True,
60
+ "add_generation_prompt": False,
61
+ "top_logprobs": 5,
62
+ "prompt_logprobs": 5,
63
+ "top_k": 20,
64
+ "chat_template_kwargs": {"enable_thinking": False},
65
+ "echo": True
66
+ }
67
+ )
68
+ result = result.json()
69
+ prompt_logprobs = result['prompt_logprobs']
70
+ logprobs = []
71
+ for tok in prompt_logprobs[1:]: # first token has no logprobs
72
+ # looks like
73
+ tok_logprobs = {v['decoded_token']: v['logprob'] for v in tok.values()}
74
+ logprobs.append({
75
+ "token": next(iter(tok_logprobs.keys())),
76
+ "logprobs": tok_logprobs
77
+ })
78
+
79
+ # Add the last token (set "token" to None)
80
+ last_token_logprobs = result['choices'][0]['logprobs']['content'][0]['top_logprobs']
81
+ logprobs.append(
82
+ {
83
+ "token": None,
84
+ "logprobs": {tok["token"]: tok["logprob"] for tok in last_token_logprobs}
85
+ }
86
+ )
87
+
88
+
89
+ #st.write(last_token_logprobs)
90
+ st.write("Conversation so far as tokens (click to show logprobs):")
91
+ logprobs_component(logprobs)
92
+
93
+
94
+ def logprobs_component(logprobs):
95
+ # logprobs is a list of tokens:
96
+ # {
97
+ # "token": "the",
98
+ # "logprobs": [{"the": -0.1, "a": -0.2, ...}]
99
+ # }
100
+ import html, json
101
+ html_out = ''
102
+ for i, entry in enumerate(logprobs):
103
+ token = entry['token']
104
+ if token is not None:
105
+ token_to_show = html.escape(show_token(token, escape_markdown=False))
106
+ else:
107
+ token_to_show = html.escape("[____]")
108
+ html_out += f'<span style="border: 1px solid black; display: inline-block;" onclick="showLogprobs({i})" title="Click to show logprobs for this token">{token_to_show}</span>'
109
+ show_logprob_js = '''
110
+ const makeElt = (tag, attrs, children) => {
111
+ const elt = document.createElement(tag);
112
+ for (const [attr, val] of Object.entries(attrs)) {
113
+ elt.setAttribute(attr, val);
114
+ }
115
+ for (const child of children) {
116
+ if(typeof child === 'string') {
117
+ elt.appendChild(document.createTextNode(child));
118
+ } else {
119
+ elt.appendChild(child);
120
+ }
121
+ }
122
+ return elt;
123
+ }
124
+
125
+ function escapeToken(token) {
126
+ return token.replace(/\\n/g, '↵').replace(/\\t/g, '⇥');
127
+ }
128
+ function showLogprobs(i) {
129
+ const logprobs = allLogprobs[i].logprobs;
130
+ const container = document.getElementById('logprobs-display');
131
+ container.innerHTML = '';
132
+ container.appendChild(makeElt('ul', {}, Object.entries(logprobs).map(([token, logprob]) => makeElt('li', {}, `${escapeToken(token)}: ${Math.exp(logprob)}`))));
133
+ }
134
+ '''
135
+ html_out = f"""
136
+
137
+ <style>
138
+ p.logprobs-container {{
139
+ background: white;
140
+ line-height: 1.5;
141
+ }}
142
+ p.logprobs-container > span {{
143
+ position: relative;
144
+ padding: 2px 1px;
145
+ border-radius: 3px;
146
+ }}
147
+ </style>
148
+ <p class="logprobs-container">{html_out}</p>
149
+ <div id="logprobs-display"></div>
150
+ <script>allLogprobs = {json.dumps(logprobs)};
151
+
152
+ {show_logprob_js}
153
+
154
+ //showLogprobs(allLogprobs.length - 1);
155
+ </script>
156
+ """
157
+ import streamlit.components.v1 as components
158
+ return components.html(html_out, height=300, scrolling=True)
159
+
160
+ show_internals()