Spaces:
Sleeping
Sleeping
Tristan commited on
Commit ·
b779be4
1
Parent(s): ba16f8a
Add interactive token visualization with hover tooltips showing top-5 alternatives
Browse files- README_SPACE.md +3 -1
- app.py +59 -9
- static/css/style.css +125 -0
- static/js/app.js +52 -1
README_SPACE.md
CHANGED
|
@@ -10,13 +10,15 @@ pinned: false
|
|
| 10 |
# Text Generation & Summarization App
|
| 11 |
|
| 12 |
This application provides two AI-powered features:
|
| 13 |
-
- **Text Generation**: Generate text completions using Qwen2.5-0.5B-Instruct
|
| 14 |
- **Summarization**: Summarize long text using BART-large-CNN
|
| 15 |
|
| 16 |
## Features
|
| 17 |
- FastAPI backend
|
| 18 |
- Interactive web interface
|
|
|
|
| 19 |
- Real-time text generation and summarization
|
|
|
|
| 20 |
- Adjustable parameters (max tokens, sampling)
|
| 21 |
|
| 22 |
## Models Used
|
|
|
|
| 10 |
# Text Generation & Summarization App
|
| 11 |
|
| 12 |
This application provides two AI-powered features:
|
| 13 |
+
- **Text Generation**: Generate text completions using Qwen2.5-0.5B-Instruct with interactive token visualization
|
| 14 |
- **Summarization**: Summarize long text using BART-large-CNN
|
| 15 |
|
| 16 |
## Features
|
| 17 |
- FastAPI backend
|
| 18 |
- Interactive web interface
|
| 19 |
+
- **Interactive Token Visualization**: Hover over any generated token to see the top 5 alternative tokens the model considered
|
| 20 |
- Real-time text generation and summarization
|
| 21 |
+
- Next word prediction with probability scores
|
| 22 |
- Adjustable parameters (max tokens, sampling)
|
| 23 |
|
| 24 |
## Models Used
|
app.py
CHANGED
|
@@ -41,15 +41,65 @@ def generate(req: GenRequest):
|
|
| 41 |
)
|
| 42 |
return {"generated_text": out[0]["summary_text"]}
|
| 43 |
else:
|
| 44 |
-
# Use text generation pipeline
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
@app.post("/predict_next")
|
| 55 |
def predict_next(req: GenRequest):
|
|
|
|
| 41 |
)
|
| 42 |
return {"generated_text": out[0]["summary_text"]}
|
| 43 |
else:
|
| 44 |
+
# Use text generation pipeline with token-level alternatives
|
| 45 |
+
return generate_with_alternatives(req)
|
| 46 |
+
|
| 47 |
+
def generate_with_alternatives(req: GenRequest):
|
| 48 |
+
"""Generate text token-by-token with top-5 alternatives for each token"""
|
| 49 |
+
input_text = req.text
|
| 50 |
+
max_new_tokens = req.max_new_tokens
|
| 51 |
+
|
| 52 |
+
tokens_data = []
|
| 53 |
+
current_text = input_text
|
| 54 |
+
|
| 55 |
+
for _ in range(max_new_tokens):
|
| 56 |
+
inputs = tokenizer(current_text, return_tensors="pt")
|
| 57 |
+
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
outputs = model(**inputs)
|
| 60 |
+
next_token_logits = outputs.logits[0, -1, :]
|
| 61 |
+
|
| 62 |
+
# Get probabilities
|
| 63 |
+
probs = torch.softmax(next_token_logits, dim=-1)
|
| 64 |
+
|
| 65 |
+
# Get top 5 alternatives
|
| 66 |
+
top_k = 5
|
| 67 |
+
top_probs, top_indices = torch.topk(probs, top_k)
|
| 68 |
+
|
| 69 |
+
# Choose the greedy token (highest probability)
|
| 70 |
+
chosen_token_id = top_indices[0].item()
|
| 71 |
+
chosen_token = tokenizer.decode([chosen_token_id])
|
| 72 |
+
|
| 73 |
+
# Collect alternatives
|
| 74 |
+
alternatives = []
|
| 75 |
+
for i in range(top_k):
|
| 76 |
+
token_id = top_indices[i].item()
|
| 77 |
+
token_text = tokenizer.decode([token_id])
|
| 78 |
+
probability = top_probs[i].item() * 100
|
| 79 |
+
alternatives.append({
|
| 80 |
+
"token": token_text,
|
| 81 |
+
"probability": round(probability, 2)
|
| 82 |
+
})
|
| 83 |
+
|
| 84 |
+
tokens_data.append({
|
| 85 |
+
"token": chosen_token,
|
| 86 |
+
"alternatives": alternatives
|
| 87 |
+
})
|
| 88 |
+
|
| 89 |
+
# Update current text with chosen token
|
| 90 |
+
current_text += chosen_token
|
| 91 |
+
|
| 92 |
+
# Check for end of sequence
|
| 93 |
+
if chosen_token_id == tokenizer.eos_token_id:
|
| 94 |
+
break
|
| 95 |
+
|
| 96 |
+
# Reconstruct full text
|
| 97 |
+
generated_text = "".join([t["token"] for t in tokens_data])
|
| 98 |
+
|
| 99 |
+
return {
|
| 100 |
+
"generated_text": generated_text,
|
| 101 |
+
"tokens": tokens_data
|
| 102 |
+
}
|
| 103 |
|
| 104 |
@app.post("/predict_next")
|
| 105 |
def predict_next(req: GenRequest):
|
static/css/style.css
CHANGED
|
@@ -182,6 +182,120 @@ button:active {
|
|
| 182 |
font-style: italic;
|
| 183 |
}
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
/* Predictions Section */
|
| 186 |
.predictions-section {
|
| 187 |
margin-top: 30px;
|
|
@@ -293,4 +407,15 @@ button:active {
|
|
| 293 |
.prediction-token {
|
| 294 |
font-size: 0.95em;
|
| 295 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
}
|
|
|
|
| 182 |
font-style: italic;
|
| 183 |
}
|
| 184 |
|
| 185 |
+
/* Interactive Token Display */
|
| 186 |
+
.interactive-output {
|
| 187 |
+
line-height: 2;
|
| 188 |
+
font-size: 16px;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.hoverable-token {
|
| 192 |
+
position: relative;
|
| 193 |
+
display: inline-block;
|
| 194 |
+
padding: 2px 4px;
|
| 195 |
+
margin: 0 1px;
|
| 196 |
+
border-radius: 4px;
|
| 197 |
+
cursor: pointer;
|
| 198 |
+
transition: all 0.2s ease;
|
| 199 |
+
background: transparent;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.hoverable-token:hover {
|
| 203 |
+
background: #e3f2fd;
|
| 204 |
+
box-shadow: 0 2px 8px rgba(102, 126, 234, 0.2);
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
/* Tooltip for alternatives */
|
| 208 |
+
.token-tooltip {
|
| 209 |
+
visibility: hidden;
|
| 210 |
+
opacity: 0;
|
| 211 |
+
position: absolute;
|
| 212 |
+
bottom: 100%;
|
| 213 |
+
left: 50%;
|
| 214 |
+
transform: translateX(-50%) translateY(-10px);
|
| 215 |
+
background: #2c3e50;
|
| 216 |
+
color: white;
|
| 217 |
+
padding: 12px;
|
| 218 |
+
border-radius: 8px;
|
| 219 |
+
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
|
| 220 |
+
z-index: 1000;
|
| 221 |
+
min-width: 250px;
|
| 222 |
+
max-width: 350px;
|
| 223 |
+
margin-bottom: 8px;
|
| 224 |
+
transition: all 0.3s ease;
|
| 225 |
+
pointer-events: none;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
.token-tooltip::after {
|
| 229 |
+
content: '';
|
| 230 |
+
position: absolute;
|
| 231 |
+
top: 100%;
|
| 232 |
+
left: 50%;
|
| 233 |
+
transform: translateX(-50%);
|
| 234 |
+
border: 8px solid transparent;
|
| 235 |
+
border-top-color: #2c3e50;
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
.hoverable-token:hover .token-tooltip {
|
| 239 |
+
visibility: visible;
|
| 240 |
+
opacity: 1;
|
| 241 |
+
transform: translateX(-50%) translateY(0);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
.tooltip-header {
|
| 245 |
+
font-weight: 700;
|
| 246 |
+
font-size: 0.9em;
|
| 247 |
+
margin-bottom: 10px;
|
| 248 |
+
color: #ecf0f1;
|
| 249 |
+
border-bottom: 1px solid rgba(255, 255, 255, 0.2);
|
| 250 |
+
padding-bottom: 6px;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
.tooltip-item {
|
| 254 |
+
display: flex;
|
| 255 |
+
align-items: center;
|
| 256 |
+
justify-content: space-between;
|
| 257 |
+
padding: 8px 6px;
|
| 258 |
+
margin: 4px 0;
|
| 259 |
+
border-radius: 4px;
|
| 260 |
+
background: rgba(255, 255, 255, 0.05);
|
| 261 |
+
transition: background 0.2s ease;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
.tooltip-item:hover {
|
| 265 |
+
background: rgba(255, 255, 255, 0.1);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.tooltip-item.chosen {
|
| 269 |
+
background: linear-gradient(135deg, rgba(102, 126, 234, 0.3) 0%, rgba(118, 75, 162, 0.3) 100%);
|
| 270 |
+
border: 1px solid rgba(102, 126, 234, 0.5);
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
.tooltip-token {
|
| 274 |
+
font-family: 'Courier New', monospace;
|
| 275 |
+
font-weight: 600;
|
| 276 |
+
font-size: 0.95em;
|
| 277 |
+
color: #ecf0f1;
|
| 278 |
+
margin-right: 8px;
|
| 279 |
+
flex: 1;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
.tooltip-prob {
|
| 283 |
+
font-weight: 700;
|
| 284 |
+
color: #3498db;
|
| 285 |
+
font-size: 0.9em;
|
| 286 |
+
margin-left: 8px;
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
.chosen-badge {
|
| 290 |
+
background: #27ae60;
|
| 291 |
+
color: white;
|
| 292 |
+
padding: 2px 8px;
|
| 293 |
+
border-radius: 10px;
|
| 294 |
+
font-size: 0.75em;
|
| 295 |
+
font-weight: 600;
|
| 296 |
+
margin-left: 8px;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
/* Predictions Section */
|
| 300 |
.predictions-section {
|
| 301 |
margin-top: 30px;
|
|
|
|
| 407 |
.prediction-token {
|
| 408 |
font-size: 0.95em;
|
| 409 |
}
|
| 410 |
+
|
| 411 |
+
/* Mobile tooltip adjustments */
|
| 412 |
+
.token-tooltip {
|
| 413 |
+
min-width: 200px;
|
| 414 |
+
max-width: 280px;
|
| 415 |
+
font-size: 0.9em;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
.hoverable-token {
|
| 419 |
+
padding: 3px 5px;
|
| 420 |
+
}
|
| 421 |
}
|
static/js/app.js
CHANGED
|
@@ -25,7 +25,15 @@ const generateText = async () => {
|
|
| 25 |
|
| 26 |
if (response.ok) {
|
| 27 |
const data = await response.json();
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
outputElement.classList.remove("loading");
|
| 30 |
} else {
|
| 31 |
outputElement.innerText = "Error: Unable to process request.";
|
|
@@ -37,6 +45,49 @@ const generateText = async () => {
|
|
| 37 |
}
|
| 38 |
};
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
const predictNext = async () => {
|
| 41 |
const textInput = document.getElementById("textInput").value;
|
| 42 |
|
|
|
|
| 25 |
|
| 26 |
if (response.ok) {
|
| 27 |
const data = await response.json();
|
| 28 |
+
|
| 29 |
+
if (mode === "summarize" || !data.tokens) {
|
| 30 |
+
// Simple text display for summarization or legacy response
|
| 31 |
+
outputElement.innerText = data.generated_text;
|
| 32 |
+
} else {
|
| 33 |
+
// Interactive token display with hover alternatives
|
| 34 |
+
displayInteractiveTokens(data.tokens, outputElement);
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
outputElement.classList.remove("loading");
|
| 38 |
} else {
|
| 39 |
outputElement.innerText = "Error: Unable to process request.";
|
|
|
|
| 45 |
}
|
| 46 |
};
|
| 47 |
|
| 48 |
+
const displayInteractiveTokens = (tokens, container) => {
|
| 49 |
+
container.innerHTML = '';
|
| 50 |
+
container.classList.add('interactive-output');
|
| 51 |
+
|
| 52 |
+
tokens.forEach((tokenData, index) => {
|
| 53 |
+
const tokenSpan = document.createElement('span');
|
| 54 |
+
tokenSpan.className = 'hoverable-token';
|
| 55 |
+
tokenSpan.textContent = tokenData.token;
|
| 56 |
+
tokenSpan.dataset.index = index;
|
| 57 |
+
|
| 58 |
+
// Create tooltip with alternatives
|
| 59 |
+
const tooltip = document.createElement('div');
|
| 60 |
+
tooltip.className = 'token-tooltip';
|
| 61 |
+
|
| 62 |
+
let tooltipHTML = '<div class="tooltip-header">Top 5 Alternatives:</div>';
|
| 63 |
+
tokenData.alternatives.forEach((alt, i) => {
|
| 64 |
+
const isChosen = i === 0;
|
| 65 |
+
tooltipHTML += `
|
| 66 |
+
<div class="tooltip-item ${isChosen ? 'chosen' : ''}">
|
| 67 |
+
<span class="tooltip-token">${escapeHtml(alt.token)}</span>
|
| 68 |
+
<span class="tooltip-prob">${alt.probability}%</span>
|
| 69 |
+
${isChosen ? '<span class="chosen-badge">✓ chosen</span>' : ''}
|
| 70 |
+
</div>
|
| 71 |
+
`;
|
| 72 |
+
});
|
| 73 |
+
|
| 74 |
+
tooltip.innerHTML = tooltipHTML;
|
| 75 |
+
tokenSpan.appendChild(tooltip);
|
| 76 |
+
container.appendChild(tokenSpan);
|
| 77 |
+
});
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
const escapeHtml = (text) => {
|
| 81 |
+
const map = {
|
| 82 |
+
'&': '&',
|
| 83 |
+
'<': '<',
|
| 84 |
+
'>': '>',
|
| 85 |
+
'"': '"',
|
| 86 |
+
"'": '''
|
| 87 |
+
};
|
| 88 |
+
return text.replace(/[&<>"']/g, m => map[m]);
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
const predictNext = async () => {
|
| 92 |
const textInput = document.getElementById("textInput").value;
|
| 93 |
|