kcarnold commited on
Commit
cf7fe2b
·
1 Parent(s): 91f1cd3

WIP using tokenize endpoint directly

Browse files
Files changed (1) hide show
  1. internals.py +67 -16
internals.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  import requests
3
  import json
4
 
 
 
5
  def show_token(token: str, escape_markdown=True) -> str:
6
  token_display = token.replace('\n', '↵').replace('\t', '⇥')
7
  if escape_markdown:
@@ -14,6 +16,7 @@ def show_internals():
14
  if 'messages' not in st.session_state or st.button("Start a new conversation"):
15
  st.session_state['messages'] = [{"role": "user", "content": ""}]
16
  st.session_state['msg_in_progress'] = ""
 
17
  messages = st.session_state.messages
18
 
19
  def rewind_to(i):
@@ -33,19 +36,55 @@ def show_internals():
33
  if msg_in_progress is None:
34
  msg_in_progress = ""
35
 
36
- messages[-1]['content'] = msg_in_progress
 
 
 
 
 
 
 
37
 
38
- def append_token(word):
39
- messages[-1]['content'] = st.session_state['msg_in_progress'] = (
40
- msg_in_progress + word
41
- )
42
-
43
  def send_message():
44
  other_role = "assistant" if last_role == "user" else "user"
45
  st.session_state['messages'].append({"role": other_role, "content": ""})
46
  st.session_state['msg_in_progress'] = ""
47
  st.button("Send", on_click=send_message)
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Make request to vLLM.
50
  st.write(messages)
51
  result = requests.post(
@@ -76,15 +115,27 @@ def show_internals():
76
  "logprobs": tok_logprobs
77
  })
78
 
79
- # Add the last token (set "token" to None)
80
- last_token_logprobs = result['choices'][0]['logprobs']['content'][0]['top_logprobs']
81
- logprobs.append(
82
- {
83
- "token": None,
84
- "logprobs": {tok["token"]: tok["logprob"] for tok in last_token_logprobs}
85
- }
86
- )
87
-
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  #st.write(last_token_logprobs)
90
  st.write("Conversation so far as tokens (click to show logprobs):")
@@ -151,7 +202,7 @@ function escapeToken(token) {
151
 
152
  {show_logprob_js}
153
 
154
- //showLogprobs(allLogprobs.length - 1);
155
  </script>
156
  """
157
  import streamlit.components.v1 as components
 
2
  import requests
3
  import json
4
 
5
+ placeholders_to_try = '#.?!@$%^&*()_+-=~`|;:"<>,./\\'
6
+
7
  def show_token(token: str, escape_markdown=True) -> str:
8
  token_display = token.replace('\n', '↵').replace('\t', '⇥')
9
  if escape_markdown:
 
16
  if 'messages' not in st.session_state or st.button("Start a new conversation"):
17
  st.session_state['messages'] = [{"role": "user", "content": ""}]
18
  st.session_state['msg_in_progress'] = ""
19
+ st.session_state['placeholder_token'] = placeholders_to_try[0]
20
  messages = st.session_state.messages
21
 
22
  def rewind_to(i):
 
36
  if msg_in_progress is None:
37
  msg_in_progress = ""
38
 
39
+ # Unfortunately chat templates include things like this:
40
+ # {%- set content = render_content(message.content, true)|trim %}
41
+ # so we can't include leading or trailing whitespace.
42
+ # Can't do much about leading whitespace, but we can at least allow trailing whitespace by including a special token for it.
43
+ # Unfortunately there's no single token that never gets joined with any other one, so we have to try a few different ones and see which one actually gets separated out by the tokenizer.
44
+
45
+
46
+ messages[-1]['content'] = msg_in_progress + st.session_state.placeholder_token
47
 
 
 
 
 
 
48
  def send_message():
49
  other_role = "assistant" if last_role == "user" else "user"
50
  st.session_state['messages'].append({"role": other_role, "content": ""})
51
  st.session_state['msg_in_progress'] = ""
52
  st.button("Send", on_click=send_message)
53
 
54
+ token_ids_req = requests.post(
55
+ "https://vllm.thoughtful-ai.com/tokenize",
56
+ headers={"Content-Type": "application/json"},
57
+ json={
58
+ "model": "Qwen/Qwen3.5-9B",
59
+ "messages": messages,
60
+ "continue_final_message": True,
61
+ "add_generation_prompt": False,
62
+ "return_token_strs": True,
63
+ }
64
+ )
65
+ token_ids_req = token_ids_req.json()
66
+ token_ids = token_ids_req['tokens']
67
+ token_strs = token_ids_req['token_strs']
68
+
69
+ # completion given prompt token ids
70
+ tmp = requests.post(
71
+ "https://vllm.thoughtful-ai.com/v1/completions",
72
+ headers={"Content-Type": "application/json"},
73
+ json={
74
+ "model": "Qwen/Qwen3.5-9B",
75
+ "prompt": token_ids,
76
+ "max_tokens": 2,
77
+ "logprobs": 5,
78
+ "echo": True,
79
+ "top_logprobs": 5,
80
+ }
81
+ )
82
+ st.write(tmp.json()['choices'][0]['logprobs'])
83
+
84
+
85
+
86
+
87
+
88
  # Make request to vLLM.
89
  st.write(messages)
90
  result = requests.post(
 
115
  "logprobs": tok_logprobs
116
  })
117
 
118
+ # # Add the last token (set "token" to None)
119
+ # last_token_logprobs = result['choices'][0]['logprobs']['content'][0]['top_logprobs']
120
+ # logprobs.append(
121
+ # {
122
+ # "token": None,
123
+ # "logprobs": {tok["token"]: tok["logprob"] for tok in last_token_logprobs}
124
+ # }
125
+ # )
126
+
127
+ # The last token was actually the placeholder token, so it serves as the "next token" whose logprobs we want to show. We can just replace it with None for display purposes.
128
+ if logprobs and logprobs[-1]['token'] == st.session_state.placeholder_token:
129
+ logprobs[-1]['token'] = None
130
+ # remove the placeholder token logprobs, since they aren't meaningful
131
+ logprobs[-1]['logprobs'] = {tok: logprob for tok, logprob in logprobs[-1]['logprobs'].items() if tok != st.session_state.placeholder_token}
132
+ else:
133
+ st.warning("Expected the last token to be the placeholder token, but it wasn't. Logprobs may not display correctly.")
134
+ if st.button("Try a different placeholder token"):
135
+ current_index = placeholders_to_try.index(st.session_state.placeholder_token)
136
+ next_index = (current_index + 1) % len(placeholders_to_try)
137
+ st.session_state.placeholder_token = placeholders_to_try[next_index]
138
+ st.rerun()
139
 
140
  #st.write(last_token_logprobs)
141
  st.write("Conversation so far as tokens (click to show logprobs):")
 
202
 
203
  {show_logprob_js}
204
 
205
+ showLogprobs(allLogprobs.length - 1);
206
  </script>
207
  """
208
  import streamlit.components.v1 as components