Tristan commited on
Commit
b779be4
·
1 Parent(s): ba16f8a

Add interactive token visualization with hover tooltips showing top-5 alternatives

Browse files
Files changed (4) hide show
  1. README_SPACE.md +3 -1
  2. app.py +59 -9
  3. static/css/style.css +125 -0
  4. static/js/app.js +52 -1
README_SPACE.md CHANGED
@@ -10,13 +10,15 @@ pinned: false
10
  # Text Generation & Summarization App
11
 
12
  This application provides two AI-powered features:
13
- - **Text Generation**: Generate text completions using Qwen2.5-0.5B-Instruct
14
  - **Summarization**: Summarize long text using BART-large-CNN
15
 
16
  ## Features
17
  - FastAPI backend
18
  - Interactive web interface
 
19
  - Real-time text generation and summarization
 
20
  - Adjustable parameters (max tokens, sampling)
21
 
22
  ## Models Used
 
10
  # Text Generation & Summarization App
11
 
12
  This application provides two AI-powered features:
13
+ - **Text Generation**: Generate text completions using Qwen2.5-0.5B-Instruct with interactive token visualization
14
  - **Summarization**: Summarize long text using BART-large-CNN
15
 
16
  ## Features
17
  - FastAPI backend
18
  - Interactive web interface
19
+ - **Interactive Token Visualization**: Hover over any generated token to see the top 5 alternative tokens the model considered
20
  - Real-time text generation and summarization
21
+ - Next word prediction with probability scores
22
  - Adjustable parameters (max tokens, sampling)
23
 
24
  ## Models Used
app.py CHANGED
@@ -41,15 +41,65 @@ def generate(req: GenRequest):
41
  )
42
  return {"generated_text": out[0]["summary_text"]}
43
  else:
44
- # Use text generation pipeline
45
- out = generator_pipe(
46
- req.text,
47
- max_new_tokens=req.max_new_tokens,
48
- do_sample=req.do_sample,
49
- truncation=True,
50
- return_full_text=False,
51
- )
52
- return {"generated_text": out[0]["generated_text"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  @app.post("/predict_next")
55
  def predict_next(req: GenRequest):
 
41
  )
42
  return {"generated_text": out[0]["summary_text"]}
43
  else:
44
+ # Use text generation pipeline with token-level alternatives
45
+ return generate_with_alternatives(req)
46
+
47
+ def generate_with_alternatives(req: GenRequest):
48
+ """Generate text token-by-token with top-5 alternatives for each token"""
49
+ input_text = req.text
50
+ max_new_tokens = req.max_new_tokens
51
+
52
+ tokens_data = []
53
+ current_text = input_text
54
+
55
+ for _ in range(max_new_tokens):
56
+ inputs = tokenizer(current_text, return_tensors="pt")
57
+
58
+ with torch.no_grad():
59
+ outputs = model(**inputs)
60
+ next_token_logits = outputs.logits[0, -1, :]
61
+
62
+ # Get probabilities
63
+ probs = torch.softmax(next_token_logits, dim=-1)
64
+
65
+ # Get top 5 alternatives
66
+ top_k = 5
67
+ top_probs, top_indices = torch.topk(probs, top_k)
68
+
69
+ # Choose the greedy token (highest probability)
70
+ chosen_token_id = top_indices[0].item()
71
+ chosen_token = tokenizer.decode([chosen_token_id])
72
+
73
+ # Collect alternatives
74
+ alternatives = []
75
+ for i in range(top_k):
76
+ token_id = top_indices[i].item()
77
+ token_text = tokenizer.decode([token_id])
78
+ probability = top_probs[i].item() * 100
79
+ alternatives.append({
80
+ "token": token_text,
81
+ "probability": round(probability, 2)
82
+ })
83
+
84
+ tokens_data.append({
85
+ "token": chosen_token,
86
+ "alternatives": alternatives
87
+ })
88
+
89
+ # Update current text with chosen token
90
+ current_text += chosen_token
91
+
92
+ # Check for end of sequence
93
+ if chosen_token_id == tokenizer.eos_token_id:
94
+ break
95
+
96
+ # Reconstruct full text
97
+ generated_text = "".join([t["token"] for t in tokens_data])
98
+
99
+ return {
100
+ "generated_text": generated_text,
101
+ "tokens": tokens_data
102
+ }
103
 
104
  @app.post("/predict_next")
105
  def predict_next(req: GenRequest):
static/css/style.css CHANGED
@@ -182,6 +182,120 @@ button:active {
182
  font-style: italic;
183
  }
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  /* Predictions Section */
186
  .predictions-section {
187
  margin-top: 30px;
@@ -293,4 +407,15 @@ button:active {
293
  .prediction-token {
294
  font-size: 0.95em;
295
  }
 
 
 
 
 
 
 
 
 
 
 
296
  }
 
182
  font-style: italic;
183
  }
184
 
185
+ /* Interactive Token Display */
186
+ .interactive-output {
187
+ line-height: 2;
188
+ font-size: 16px;
189
+ }
190
+
191
+ .hoverable-token {
192
+ position: relative;
193
+ display: inline-block;
194
+ padding: 2px 4px;
195
+ margin: 0 1px;
196
+ border-radius: 4px;
197
+ cursor: pointer;
198
+ transition: all 0.2s ease;
199
+ background: transparent;
200
+ }
201
+
202
+ .hoverable-token:hover {
203
+ background: #e3f2fd;
204
+ box-shadow: 0 2px 8px rgba(102, 126, 234, 0.2);
205
+ }
206
+
207
+ /* Tooltip for alternatives */
208
+ .token-tooltip {
209
+ visibility: hidden;
210
+ opacity: 0;
211
+ position: absolute;
212
+ bottom: 100%;
213
+ left: 50%;
214
+ transform: translateX(-50%) translateY(-10px);
215
+ background: #2c3e50;
216
+ color: white;
217
+ padding: 12px;
218
+ border-radius: 8px;
219
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
220
+ z-index: 1000;
221
+ min-width: 250px;
222
+ max-width: 350px;
223
+ margin-bottom: 8px;
224
+ transition: all 0.3s ease;
225
+ pointer-events: none;
226
+ }
227
+
228
+ .token-tooltip::after {
229
+ content: '';
230
+ position: absolute;
231
+ top: 100%;
232
+ left: 50%;
233
+ transform: translateX(-50%);
234
+ border: 8px solid transparent;
235
+ border-top-color: #2c3e50;
236
+ }
237
+
238
+ .hoverable-token:hover .token-tooltip {
239
+ visibility: visible;
240
+ opacity: 1;
241
+ transform: translateX(-50%) translateY(0);
242
+ }
243
+
244
+ .tooltip-header {
245
+ font-weight: 700;
246
+ font-size: 0.9em;
247
+ margin-bottom: 10px;
248
+ color: #ecf0f1;
249
+ border-bottom: 1px solid rgba(255, 255, 255, 0.2);
250
+ padding-bottom: 6px;
251
+ }
252
+
253
+ .tooltip-item {
254
+ display: flex;
255
+ align-items: center;
256
+ justify-content: space-between;
257
+ padding: 8px 6px;
258
+ margin: 4px 0;
259
+ border-radius: 4px;
260
+ background: rgba(255, 255, 255, 0.05);
261
+ transition: background 0.2s ease;
262
+ }
263
+
264
+ .tooltip-item:hover {
265
+ background: rgba(255, 255, 255, 0.1);
266
+ }
267
+
268
+ .tooltip-item.chosen {
269
+ background: linear-gradient(135deg, rgba(102, 126, 234, 0.3) 0%, rgba(118, 75, 162, 0.3) 100%);
270
+ border: 1px solid rgba(102, 126, 234, 0.5);
271
+ }
272
+
273
+ .tooltip-token {
274
+ font-family: 'Courier New', monospace;
275
+ font-weight: 600;
276
+ font-size: 0.95em;
277
+ color: #ecf0f1;
278
+ margin-right: 8px;
279
+ flex: 1;
280
+ }
281
+
282
+ .tooltip-prob {
283
+ font-weight: 700;
284
+ color: #3498db;
285
+ font-size: 0.9em;
286
+ margin-left: 8px;
287
+ }
288
+
289
+ .chosen-badge {
290
+ background: #27ae60;
291
+ color: white;
292
+ padding: 2px 8px;
293
+ border-radius: 10px;
294
+ font-size: 0.75em;
295
+ font-weight: 600;
296
+ margin-left: 8px;
297
+ }
298
+
299
  /* Predictions Section */
300
  .predictions-section {
301
  margin-top: 30px;
 
407
  .prediction-token {
408
  font-size: 0.95em;
409
  }
410
+
411
+ /* Mobile tooltip adjustments */
412
+ .token-tooltip {
413
+ min-width: 200px;
414
+ max-width: 280px;
415
+ font-size: 0.9em;
416
+ }
417
+
418
+ .hoverable-token {
419
+ padding: 3px 5px;
420
+ }
421
  }
static/js/app.js CHANGED
@@ -25,7 +25,15 @@ const generateText = async () => {
25
 
26
  if (response.ok) {
27
  const data = await response.json();
28
- outputElement.innerText = data.generated_text;
 
 
 
 
 
 
 
 
29
  outputElement.classList.remove("loading");
30
  } else {
31
  outputElement.innerText = "Error: Unable to process request.";
@@ -37,6 +45,49 @@ const generateText = async () => {
37
  }
38
  };
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  const predictNext = async () => {
41
  const textInput = document.getElementById("textInput").value;
42
 
 
25
 
26
  if (response.ok) {
27
  const data = await response.json();
28
+
29
+ if (mode === "summarize" || !data.tokens) {
30
+ // Simple text display for summarization or legacy response
31
+ outputElement.innerText = data.generated_text;
32
+ } else {
33
+ // Interactive token display with hover alternatives
34
+ displayInteractiveTokens(data.tokens, outputElement);
35
+ }
36
+
37
  outputElement.classList.remove("loading");
38
  } else {
39
  outputElement.innerText = "Error: Unable to process request.";
 
45
  }
46
  };
47
 
48
+ const displayInteractiveTokens = (tokens, container) => {
49
+ container.innerHTML = '';
50
+ container.classList.add('interactive-output');
51
+
52
+ tokens.forEach((tokenData, index) => {
53
+ const tokenSpan = document.createElement('span');
54
+ tokenSpan.className = 'hoverable-token';
55
+ tokenSpan.textContent = tokenData.token;
56
+ tokenSpan.dataset.index = index;
57
+
58
+ // Create tooltip with alternatives
59
+ const tooltip = document.createElement('div');
60
+ tooltip.className = 'token-tooltip';
61
+
62
+ let tooltipHTML = '<div class="tooltip-header">Top 5 Alternatives:</div>';
63
+ tokenData.alternatives.forEach((alt, i) => {
64
+ const isChosen = i === 0;
65
+ tooltipHTML += `
66
+ <div class="tooltip-item ${isChosen ? 'chosen' : ''}">
67
+ <span class="tooltip-token">${escapeHtml(alt.token)}</span>
68
+ <span class="tooltip-prob">${alt.probability}%</span>
69
+ ${isChosen ? '<span class="chosen-badge">✓ chosen</span>' : ''}
70
+ </div>
71
+ `;
72
+ });
73
+
74
+ tooltip.innerHTML = tooltipHTML;
75
+ tokenSpan.appendChild(tooltip);
76
+ container.appendChild(tokenSpan);
77
+ });
78
+ };
79
+
80
+ const escapeHtml = (text) => {
81
+ const map = {
82
+ '&': '&amp;',
83
+ '<': '&lt;',
84
+ '>': '&gt;',
85
+ '"': '&quot;',
86
+ "'": '&#039;'
87
+ };
88
+ return text.replace(/[&<>"']/g, m => map[m]);
89
+ };
90
+
91
  const predictNext = async () => {
92
  const textInput = document.getElementById("textInput").value;
93