File size: 7,661 Bytes
495786b
 
 
91f1cd3
 
 
 
cf7fe2b
 
91f1cd3
 
 
 
 
 
 
 
 
 
 
 
cf7fe2b
91f1cd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf7fe2b
 
 
 
 
 
 
 
91f1cd3
495786b
 
91f1cd3
 
 
 
 
 
cf7fe2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495786b
cf7fe2b
 
 
 
 
 
 
 
 
 
495786b
 
 
91f1cd3
495786b
 
 
 
 
 
 
 
 
 
91f1cd3
495786b
 
91f1cd3
495786b
91f1cd3
cf7fe2b
 
 
 
 
 
 
 
 
 
 
91f1cd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf7fe2b
91f1cd3
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# References for vLLM:
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/completion/protocol.py

import streamlit as st
import requests
import json

placeholders_to_try = '#.?!@$%^&*()_+-=~`|;:"<>,./\\'

def show_token(token: str, escape_markdown=True) -> str:
    token_display = token.replace('\n', '↵').replace('\t', '⇥')
    if escape_markdown:
        for c in "\\`*_{}[]()#+-.!":
            token_display = token_display.replace(c, "\\" + c)
    return token_display


def show_internals():
    if 'messages' not in st.session_state or st.button("Start a new conversation"):
        st.session_state['messages'] = [{"role": "user", "content": ""}]
        st.session_state['msg_in_progress'] = ""
        st.session_state['placeholder_token'] = placeholders_to_try[0]
    messages = st.session_state.messages

    def rewind_to(i):
        st.session_state.messages = st.session_state.messages[:i+1]
        st.session_state['msg_in_progress'] = st.session_state.messages[-1]['content']

    for i, message in enumerate(st.session_state.messages[:-1]):
        with st.chat_message(message["role"]):
            st.markdown(message["content"])
            st.button("Edit", on_click=rewind_to, args=(i,), key=f"rewind_to_{i}")

    # Display message-in-progress in chat message container
    last_role = messages[-1]["role"]
    with st.chat_message(last_role):
        label = "Your message" if last_role == "user" else "Assistant response"
        msg_in_progress = st.text_area(label, placeholder="Clicking the buttons below will update this field. You can also edit it directly; press Ctrl+Enter to apply changes.", height=300, key="msg_in_progress")
        if msg_in_progress is None:
            msg_in_progress = ""

        # Unfortunately chat templates include things like this:
        #     {%- set content = render_content(message.content, true)|trim %}
        # so we can't include leading or trailing whitespace.
        # Can't do much about leading whitespace, but we can at least allow trailing whitespace by including a special token for it.
        # Unfortunately there's no single token that never gets joined with any other one, so we have to try a few different ones and see which one actually gets separated out by the tokenizer.


        messages[-1]['content'] = msg_in_progress + st.session_state.placeholder_token

        st.write(messages)

        def send_message():
            other_role = "assistant" if last_role == "user" else "user"
            st.session_state['messages'].append({"role": other_role, "content": ""})
            st.session_state['msg_in_progress'] = ""
        st.button("Send", on_click=send_message)

        token_ids_req = requests.post(
            "https://vllm.thoughtful-ai.com/tokenize",
            headers={"Content-Type": "application/json"},
            json={
                "model": "Qwen/Qwen3.5-9B",
                "messages": messages,
                "continue_final_message": True,
                "add_generation_prompt": False,
                "return_token_strs": True,
            }
        )
        token_ids_req = token_ids_req.json()
        token_ids = token_ids_req['tokens']
        token_strs = token_ids_req['token_strs']

        # completion given prompt token ids
        logprobs_request = requests.post(
            "https://vllm.thoughtful-ai.com/v1/completions",
            headers={"Content-Type": "application/json"},
            json={
                "model": "Qwen/Qwen3.5-9B",
                "prompt": token_ids,
                "max_tokens": 2,
                "logprobs": 5,
                "echo": True,
            }
        )
        logprobs_request = logprobs_request.json()
        complete_text = logprobs_request['choices'][0]['text']
        logprobs_part = logprobs_request['choices'][0]['logprobs']
        logprobs = []
        for i in range(len(token_ids)):
            if i == 0:
                # first token has no logprobs, but show the token string.
                logprobs.append({
                    "token": logprobs_part['tokens'][0],
                    "logprobs": None
                })
                continue

            top_logprobs = logprobs_part['top_logprobs'][i]
            logprobs.append({
                "token": logprobs_part['tokens'][i],
                "logprobs": {tok: logprob for tok, logprob in top_logprobs.items()}
            })
        #st.write(logprobs_part)

        if logprobs and logprobs[-1]['token'] == st.session_state.placeholder_token:
            logprobs[-1]['token'] = None
            # remove the placeholder token logprobs, since they aren't meaningful
            logprobs[-1]['logprobs'] = {tok: logprob for tok, logprob in logprobs[-1]['logprobs'].items() if tok != st.session_state.placeholder_token}
        else:
            st.warning("Expected the last token to be the placeholder token, but it wasn't. Logprobs may not display correctly.")
            if st.button("Try a different placeholder token"):
                current_index = placeholders_to_try.index(st.session_state.placeholder_token)
                next_index = (current_index + 1) % len(placeholders_to_try)
                st.session_state.placeholder_token = placeholders_to_try[next_index]
                st.rerun()

        #st.write(last_token_logprobs)
        st.write("Conversation so far as tokens (click to show logprobs):")
        logprobs_component(logprobs)
        
        
def logprobs_component(logprobs):
    # logprobs is a list of tokens:
    # {
    #     "token": "the",
    #     "logprobs": [{"the": -0.1, "a": -0.2, ...}]
    # }
    import html, json
    html_out = ''
    for i, entry in enumerate(logprobs):
        token = entry['token']
        if token is not None:
            token_to_show = html.escape(show_token(token, escape_markdown=False))
        else:
            token_to_show = html.escape("[____]")
        html_out += f'<span style="border: 1px solid black; display: inline-block;" onclick="showLogprobs({i})" title="Click to show logprobs for this token">{token_to_show}</span>'
    show_logprob_js = '''
const makeElt = (tag, attrs, children) => {
    const elt = document.createElement(tag);
    for (const [attr, val] of Object.entries(attrs)) {
        elt.setAttribute(attr, val);
    }
    for (const child of children) {
        if(typeof child === 'string') {
            elt.appendChild(document.createTextNode(child));
        } else {
            elt.appendChild(child);
        }
    }
    return elt;
}

function escapeToken(token) {
    return token.replace(/\\n/g, '↵').replace(/\\t/g, '⇥');
}
    function showLogprobs(i) {
        const logprobs = allLogprobs[i].logprobs;
        const container = document.getElementById('logprobs-display');
        container.innerHTML = '';
        container.appendChild(makeElt('ul', {}, Object.entries(logprobs).map(([token, logprob]) => makeElt('li', {}, `${escapeToken(token)}: ${Math.exp(logprob)}`))));
    }
'''
    html_out = f"""

    <style>
        p.logprobs-container {{
            background: white;
            line-height: 1.5;
        }}
        p.logprobs-container > span {{
            position: relative;
            padding: 2px 1px;
            border-radius: 3px;
        }}
    </style>
    <p class="logprobs-container">{html_out}</p>
    <div id="logprobs-display"></div>
    <script>allLogprobs = {json.dumps(logprobs)};
    
    {show_logprob_js}

showLogprobs(allLogprobs.length - 1);
</script>
"""
    import streamlit.components.v1 as components
    return components.html(html_out, height=300, scrolling=True)

show_internals()