Spaces:
Running on Zero
Running on Zero
Commit ·
ebc3bf5
1
Parent(s): f349c7e
Project files
Browse files- .gitignore +16 -0
- README.md +123 -2
- app.py +26 -0
- config.py +68 -0
- core/compressor.py +48 -0
- core/diff.py +160 -0
- core/scorer.py +11 -0
- core/tokenizer_utils.py +13 -0
- db/schema.sql +16 -0
- db/store.py +87 -0
- docs/architecture.md +55 -0
- docs/enhancements.md +27 -0
- docs/folder-structure.md +44 -0
- docs/get-started.md +72 -0
- docs/setup.md +95 -0
- models/model_loader.py +98 -0
- requirements.txt +8 -0
- tinypress_colab.ipynb +336 -0
- ui/compress_tab.py +327 -0
- ui/history_tab.py +96 -0
.gitignore
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# I store my findings here, not public 😂
|
| 2 |
+
my-notes
|
| 3 |
+
|
| 4 |
+
# You dont need my credentials 🫣
|
| 5 |
+
.env
|
| 6 |
+
.venv
|
| 7 |
+
|
| 8 |
+
# I did use vibe code 😉
|
| 9 |
+
.claude
|
| 10 |
+
CLAUDE.md
|
| 11 |
+
AGENTS.md
|
| 12 |
+
claude*
|
| 13 |
+
|
| 14 |
+
# Caches
|
| 15 |
+
__pycache__
|
| 16 |
+
*.db
|
README.md
CHANGED
|
@@ -4,10 +4,131 @@ emoji: 📊
|
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
python_version: '3.12'
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: "5.0"
|
| 8 |
python_version: '3.12'
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
+
license: mit
|
| 12 |
+
short_description: Compress any text to a token budget — scored and diffed, fully local.
|
| 13 |
+
models:
|
| 14 |
+
- Qwen/Qwen2.5-1.5B-Instruct
|
| 15 |
+
tags:
|
| 16 |
+
- gradio
|
| 17 |
+
- build-small-hackathon
|
| 18 |
+
- thousand-token-wood
|
| 19 |
+
- text-compression
|
| 20 |
+
- prompt-optimization
|
| 21 |
+
- local-inference
|
| 22 |
---
|
| 23 |
|
| 24 |
+
# TinyPress — Prompt Compression Engine
|
| 25 |
+
|
| 26 |
+
> **HuggingFace Build Small Hackathon · Track: Thousand Token Wood**
|
| 27 |
+
|
| 28 |
+
The constraint *is* the feature. Give TinyPress a long piece of text, set a token budget, and get back a compressed version that still carries the meaning — scored, saved, and diffed so you can see exactly what was kept and what was shed.
|
| 29 |
+
|
| 30 |
+
No cloud. No API bill. Two small models running quietly on your machine.
|
| 31 |
+
|
| 32 |
+
---
|
| 33 |
+
|
| 34 |
+
## Why this fits Thousand Token Wood
|
| 35 |
+
|
| 36 |
+
Working inside a tight token budget is not a limitation to work around — it is the problem worth solving. LLM context windows are finite, prompt costs are real, and bloated inputs degrade output quality. TinyPress treats the token count as a hard constraint and makes compression the primary interaction: you set the budget, the model meets it, and a quality score tells you how much meaning survived.
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
## Features
|
| 41 |
+
|
| 42 |
+
| | |
|
| 43 |
+
|---|---|
|
| 44 |
+
| 🗜️ **Token-budget compression** | Set a target (100–1000 tokens) and compress to exactly that budget |
|
| 45 |
+
| 📊 **Quality score** | Cosine similarity between original and compressed text — 0 to 1, higher is better |
|
| 46 |
+
| 🟢🔴 **Live readiness banner** | Green when input is over budget and compression will run; red when already within budget |
|
| 47 |
+
| 🔍 **Token highlight panel** | Every token rendered as a colour-coded chip so you can see where your budget is going |
|
| 48 |
+
| 🔀 **Model hot-swap** | Switch the compression LLM mid-session without a restart (5 curated models, or any HF model ID) |
|
| 49 |
+
| 🎯 **Embedder hot-swap** | Switch the scoring embedder with per-model trade-off info (speed vs quality vs RAM) |
|
| 50 |
+
| 👍👎 **Feedback capture** | Rate every result, add an optional text note — saved instantly to SQLite |
|
| 51 |
+
| 📜 **Run history** | Every compression persisted locally with full metrics and configurable column visibility |
|
| 52 |
+
| 🔎 **Side-by-side diff** | Word-level colour diff — dropped (red), rewritten (amber), inserted (green), unchanged (plain) |
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## Models
|
| 57 |
+
|
| 58 |
+
| Role | Default | Alternatives |
|
| 59 |
+
|---|---|---|
|
| 60 |
+
| Compression LLM | `Qwen/Qwen2.5-1.5B-Instruct` | Qwen2.5-0.5B, SmolLM2-1.7B, Phi-3.5-mini, Llama-3.2-1B |
|
| 61 |
+
| Quality scorer | `sentence-transformers/all-MiniLM-L6-v2` | mpnet-base, bge-small, bge-base, mxbai-large, gte-Qwen2-1.5B |
|
| 62 |
+
|
| 63 |
+
All models are open-weight and under 32B. Everything runs locally — no API calls, no data leaves your machine.
|
| 64 |
+
|
| 65 |
+
---
|
| 66 |
+
|
| 67 |
+
## Get started
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
python -m venv .venv
|
| 71 |
+
# Windows
|
| 72 |
+
.venv\Scripts\activate
|
| 73 |
+
# macOS / Linux
|
| 74 |
+
source .venv/bin/activate
|
| 75 |
+
|
| 76 |
+
pip install -r requirements.txt
|
| 77 |
+
python app.py
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
Open `http://localhost:7860`. That's it.
|
| 81 |
+
|
| 82 |
+
**Run it in Colab:** open `tinypress_colab.ipynb` — it installs dependencies, loads the models, and launches a public Gradio share URL. GPU runtime recommended for faster inference.
|
| 83 |
+
|
| 84 |
+
Optional environment overrides:
|
| 85 |
+
|
| 86 |
+
| Variable | Default | Description |
|
| 87 |
+
|---|---|---|
|
| 88 |
+
| `LLM_MODEL` | `Qwen/Qwen2.5-1.5B-Instruct` | Compression model |
|
| 89 |
+
| `EMBEDDER_MODEL` | `sentence-transformers/all-MiniLM-L6-v2` | Scoring embedder |
|
| 90 |
+
| `DB_PATH` | `tinypress.db` | SQLite database path |
|
| 91 |
+
| `PORT` | `7860` | Gradio server port |
|
| 92 |
+
|
| 93 |
+
---
|
| 94 |
+
|
| 95 |
+
## Hardware
|
| 96 |
+
|
| 97 |
+
| | Minimum | Recommended |
|
| 98 |
+
|---|---|---|
|
| 99 |
+
| RAM | 8 GB | 16 GB |
|
| 100 |
+
| VRAM | CPU-only works | 4 GB GPU speeds up inference |
|
| 101 |
+
| Disk | ~4 GB | ~4 GB |
|
| 102 |
+
|
| 103 |
+
---
|
| 104 |
+
|
| 105 |
+
## Architecture
|
| 106 |
+
|
| 107 |
+
```
|
| 108 |
+
Input text + token budget
|
| 109 |
+
│
|
| 110 |
+
core/compressor.py — builds prompt, calls LLM, hard-trims if it overshoots
|
| 111 |
+
│
|
| 112 |
+
models/model_loader.py — Qwen2.5-1.5B (or swapped model), loaded once, reused
|
| 113 |
+
│
|
| 114 |
+
core/scorer.py — cosine similarity via sentence-transformer embedder
|
| 115 |
+
│
|
| 116 |
+
db/store.py — saves run to SQLite
|
| 117 |
+
│
|
| 118 |
+
ui/compress_tab.py — shows result, metrics, feedback UI
|
| 119 |
+
```
|
| 120 |
+
|
| 121 |
+
Thin UI layer — Gradio handlers pass inputs to `core/`, return outputs. All logic lives in `core/` and `db/`.
|
| 122 |
+
|
| 123 |
+
Full docs: [Architecture](docs/architecture.md) · [Setup](docs/setup.md) · [Get Started](docs/get-started.md) · [Folder Structure](docs/folder-structure.md)
|
| 124 |
+
|
| 125 |
+
---
|
| 126 |
+
|
| 127 |
+
## About
|
| 128 |
+
|
| 129 |
+
Built by **[Sriharsha C R](https://www.linkedin.com/in/sriharsha-cr)** — AI Engineer and Cloud Native developer.
|
| 130 |
+
|
| 131 |
+
[](https://www.linkedin.com/in/sriharsha-cr)
|
| 132 |
+
[](https://x.com/sriharsha_cr)
|
| 133 |
+
[](https://huggingface.co/sriharsha-cr)
|
| 134 |
+
[](https://github.com/SriharshaCR)
|
app.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import config
|
| 3 |
+
from db.store import init_db
|
| 4 |
+
from models.model_loader import get_llm, get_embedder
|
| 5 |
+
from ui.compress_tab import build_compress_tab
|
| 6 |
+
from ui.history_tab import build_history_tab
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def build_app() -> gr.Blocks:
|
| 10 |
+
with gr.Blocks(title=config.APP_TITLE) as app:
|
| 11 |
+
build_compress_tab()
|
| 12 |
+
build_history_tab()
|
| 13 |
+
return app
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
if __name__ == "__main__":
|
| 17 |
+
print("Initialising database...")
|
| 18 |
+
init_db()
|
| 19 |
+
|
| 20 |
+
print("Loading models (first run may download weights)...")
|
| 21 |
+
get_llm()
|
| 22 |
+
get_embedder()
|
| 23 |
+
|
| 24 |
+
print("Starting TinyPress...")
|
| 25 |
+
app = build_app()
|
| 26 |
+
app.launch(server_port=config.SERVER_PORT)
|
config.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
# Model settings
|
| 4 |
+
LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-1.5B-Instruct")
|
| 5 |
+
EMBEDDER_MODEL = os.getenv("EMBEDDER_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 6 |
+
|
| 7 |
+
# Curated <32B open-weight causal LMs for local inference (shown in the UI dropdown).
|
| 8 |
+
AVAILABLE_MODELS = [
|
| 9 |
+
"Qwen/Qwen2.5-1.5B-Instruct",
|
| 10 |
+
"Qwen/Qwen2.5-0.5B-Instruct",
|
| 11 |
+
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
|
| 12 |
+
"microsoft/Phi-3.5-mini-instruct",
|
| 13 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
# Curated sentence-transformer embedding models for quality scoring.
|
| 17 |
+
AVAILABLE_EMBEDDER_MODELS = [
|
| 18 |
+
"sentence-transformers/all-MiniLM-L6-v2",
|
| 19 |
+
"sentence-transformers/all-mpnet-base-v2",
|
| 20 |
+
"BAAI/bge-small-en-v1.5",
|
| 21 |
+
"BAAI/bge-base-en-v1.5",
|
| 22 |
+
"mixedbread-ai/mxbai-embed-large-v1",
|
| 23 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
EMBEDDER_INFO = {
|
| 27 |
+
"sentence-transformers/all-MiniLM-L6-v2": (
|
| 28 |
+
"⚡ **Fast · 22M params · Default** \n"
|
| 29 |
+
"Great baseline. Scores are reliable for typical compression ratios. "
|
| 30 |
+
"Runs comfortably on CPU — minimal overhead."
|
| 31 |
+
),
|
| 32 |
+
"sentence-transformers/all-mpnet-base-v2": (
|
| 33 |
+
"⚖️ **Balanced · 110M params** \n"
|
| 34 |
+
"Noticeably sharper quality scores than MiniLM, especially on longer texts. "
|
| 35 |
+
"Small speed trade-off; fine on CPU."
|
| 36 |
+
),
|
| 37 |
+
"BAAI/bge-small-en-v1.5": (
|
| 38 |
+
"⚡ **Fast · 33M params** \n"
|
| 39 |
+
"Strong quality-to-size ratio — often matches MiniLM on accuracy while being "
|
| 40 |
+
"slightly more sensitive to meaning shifts. Good CPU option."
|
| 41 |
+
),
|
| 42 |
+
"BAAI/bge-base-en-v1.5": (
|
| 43 |
+
"⚖️ **Balanced · 109M params** \n"
|
| 44 |
+
"Consistently strong on semantic similarity benchmarks. "
|
| 45 |
+
"Scores will be more discriminating — small differences in compression quality show up more clearly."
|
| 46 |
+
),
|
| 47 |
+
"mixedbread-ai/mxbai-embed-large-v1": (
|
| 48 |
+
"🏆 **High quality · 335M params** \n"
|
| 49 |
+
"Top-tier similarity scores. Quality readings will be the most accurate here, "
|
| 50 |
+
"but slower to load and run. GPU recommended."
|
| 51 |
+
),
|
| 52 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": (
|
| 53 |
+
"🔬 **Best quality · 1.5B params** \n"
|
| 54 |
+
"Strongest semantic understanding in this list. Scores will reflect subtle meaning loss "
|
| 55 |
+
"that smaller models miss. Requires significant RAM/VRAM — GPU strongly recommended."
|
| 56 |
+
),
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# Compression settings
|
| 60 |
+
DEFAULT_TARGET_TOKENS = 500
|
| 61 |
+
MAX_NEW_TOKENS = 1024
|
| 62 |
+
|
| 63 |
+
# Database
|
| 64 |
+
DB_PATH = os.getenv("DB_PATH", "tinypress.db")
|
| 65 |
+
|
| 66 |
+
# Gradio
|
| 67 |
+
APP_TITLE = "TinyPress"
|
| 68 |
+
SERVER_PORT = int(os.getenv("PORT", 7860))
|
core/compressor.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import config
|
| 3 |
+
from core.tokenizer_utils import count_tokens
|
| 4 |
+
from models.model_loader import get_llm
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
_PROMPT_TEMPLATE = """You are a lossless compression assistant. Compress the following text to at most {target} tokens.
|
| 8 |
+
Preserve all key facts, decisions, and intent. Do not add commentary. Output only the compressed text.
|
| 9 |
+
|
| 10 |
+
TEXT:
|
| 11 |
+
{text}
|
| 12 |
+
|
| 13 |
+
COMPRESSED:"""
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _generate(prompt: str) -> str:
|
| 17 |
+
model, tokenizer = get_llm()
|
| 18 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 19 |
+
with torch.no_grad():
|
| 20 |
+
output_ids = model.generate(
|
| 21 |
+
**inputs,
|
| 22 |
+
max_new_tokens=config.MAX_NEW_TOKENS,
|
| 23 |
+
do_sample=False,
|
| 24 |
+
pad_token_id=tokenizer.eos_token_id,
|
| 25 |
+
)
|
| 26 |
+
new_tokens = output_ids[0][inputs["input_ids"].shape[1]:]
|
| 27 |
+
return tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def compress(text: str, target_tokens: int) -> tuple[str, int, int]:
|
| 31 |
+
"""Returns (compressed_text, input_token_count, output_token_count)."""
|
| 32 |
+
input_tokens = count_tokens(text)
|
| 33 |
+
|
| 34 |
+
# If already within budget, return as-is
|
| 35 |
+
if input_tokens <= target_tokens:
|
| 36 |
+
return text, input_tokens, input_tokens
|
| 37 |
+
|
| 38 |
+
prompt = _PROMPT_TEMPLATE.format(target=target_tokens, text=text)
|
| 39 |
+
compressed = _generate(prompt)
|
| 40 |
+
|
| 41 |
+
# Trim to hard token limit if model overshoots
|
| 42 |
+
_, tokenizer = get_llm()
|
| 43 |
+
ids = tokenizer.encode(compressed, add_special_tokens=False)
|
| 44 |
+
if len(ids) > target_tokens:
|
| 45 |
+
compressed = tokenizer.decode(ids[:target_tokens], skip_special_tokens=True)
|
| 46 |
+
|
| 47 |
+
output_tokens = count_tokens(compressed)
|
| 48 |
+
return compressed, input_tokens, output_tokens
|
core/diff.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import difflib
|
| 2 |
+
import html as _h
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def _word_diff(original: str, compressed: str) -> tuple[str, str]:
|
| 6 |
+
"""
|
| 7 |
+
Word-level SequenceMatcher diff.
|
| 8 |
+
Returns (annotated_original_html, annotated_compressed_html).
|
| 9 |
+
|
| 10 |
+
Colour key:
|
| 11 |
+
original — red strikethrough = dropped
|
| 12 |
+
original — plain = survived unchanged
|
| 13 |
+
compressed — amber = rewritten (replaced)
|
| 14 |
+
compressed — green = inserted (rare; model added a connector word)
|
| 15 |
+
compressed — plain = survived unchanged
|
| 16 |
+
"""
|
| 17 |
+
orig_words = original.split()
|
| 18 |
+
comp_words = compressed.split()
|
| 19 |
+
matcher = difflib.SequenceMatcher(None, orig_words, comp_words, autojunk=False)
|
| 20 |
+
|
| 21 |
+
orig_parts: list[str] = []
|
| 22 |
+
comp_parts: list[str] = []
|
| 23 |
+
|
| 24 |
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 25 |
+
ow = _h.escape(" ".join(orig_words[i1:i2]))
|
| 26 |
+
cw = _h.escape(" ".join(comp_words[j1:j2]))
|
| 27 |
+
|
| 28 |
+
if tag == "equal":
|
| 29 |
+
orig_parts.append(ow)
|
| 30 |
+
comp_parts.append(cw)
|
| 31 |
+
|
| 32 |
+
elif tag == "delete":
|
| 33 |
+
orig_parts.append(
|
| 34 |
+
f'<mark style="background:#fee2e2;color:#b91c1c;'
|
| 35 |
+
f'text-decoration:line-through;padding:1px 3px;border-radius:3px">{ow}</mark>'
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
elif tag == "insert":
|
| 39 |
+
comp_parts.append(
|
| 40 |
+
f'<mark style="background:#dcfce7;color:#15803d;'
|
| 41 |
+
f'padding:1px 3px;border-radius:3px">{cw}</mark>'
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
elif tag == "replace":
|
| 45 |
+
orig_parts.append(
|
| 46 |
+
f'<mark style="background:#fee2e2;color:#b91c1c;'
|
| 47 |
+
f'text-decoration:line-through;padding:1px 3px;border-radius:3px">{ow}</mark>'
|
| 48 |
+
)
|
| 49 |
+
comp_parts.append(
|
| 50 |
+
f'<mark style="background:#fef9c3;color:#92400e;'
|
| 51 |
+
f'padding:1px 3px;border-radius:3px">{cw}</mark>'
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
return " ".join(orig_parts), " ".join(comp_parts)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def render_diff_html(record: dict) -> str:
|
| 58 |
+
"""Build a self-contained side-by-side diff HTML block for a compression run."""
|
| 59 |
+
original = record.get("input_text", "")
|
| 60 |
+
compressed = record.get("output_text", "")
|
| 61 |
+
if not original or not compressed:
|
| 62 |
+
return ""
|
| 63 |
+
|
| 64 |
+
orig_html, comp_html = _word_diff(original, compressed)
|
| 65 |
+
|
| 66 |
+
model = _h.escape(record.get("model", "—"))
|
| 67 |
+
tokenizer = _h.escape(record.get("tokenizer", "—"))
|
| 68 |
+
ts = _h.escape(record.get("timestamp", "—"))
|
| 69 |
+
in_tok = record.get("input_tokens", "—")
|
| 70 |
+
out_tok = record.get("output_tokens", "—")
|
| 71 |
+
target_tok = record.get("target_tokens", "—")
|
| 72 |
+
ratio = record.get("compression_ratio", 0)
|
| 73 |
+
quality = record.get("quality_score", 0)
|
| 74 |
+
duration = record.get("duration_ms", "—")
|
| 75 |
+
run_id = record.get("id", "—")
|
| 76 |
+
|
| 77 |
+
feedback_val = record.get("feedback")
|
| 78 |
+
feedback_note = _h.escape(record.get("feedback_comment") or "")
|
| 79 |
+
|
| 80 |
+
# Build optional feedback block
|
| 81 |
+
if feedback_val is not None:
|
| 82 |
+
badge_bg = "#f0fdf4" if feedback_val == 1 else "#fef2f2"
|
| 83 |
+
badge_color = "#15803d" if feedback_val == 1 else "#b91c1c"
|
| 84 |
+
badge_text = "👍 Helpful" if feedback_val == 1 else "👎 Not helpful"
|
| 85 |
+
feedback_block = (
|
| 86 |
+
f'<div style="display:flex;flex-wrap:wrap;align-items:center;gap:8px;'
|
| 87 |
+
f'margin-top:10px;padding:8px 12px;border-radius:6px;background:{badge_bg}">'
|
| 88 |
+
f'<span style="font-weight:600;font-size:0.8rem;color:{badge_color}">{badge_text}</span>'
|
| 89 |
+
)
|
| 90 |
+
if feedback_note:
|
| 91 |
+
feedback_block += (
|
| 92 |
+
f'<span style="font-size:0.8rem;color:#374151;font-style:italic">'
|
| 93 |
+
f'"{feedback_note}"</span>'
|
| 94 |
+
)
|
| 95 |
+
feedback_block += "</div>"
|
| 96 |
+
else:
|
| 97 |
+
feedback_block = ""
|
| 98 |
+
|
| 99 |
+
return f"""
|
| 100 |
+
<div style="font-family:system-ui,sans-serif;margin-top:4px">
|
| 101 |
+
|
| 102 |
+
<!-- Primary meta chips -->
|
| 103 |
+
<div style="display:flex;flex-wrap:wrap;gap:6px;margin-bottom:6px;font-size:0.78rem">
|
| 104 |
+
<span style="background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151">Run #{run_id}</span>
|
| 105 |
+
<span style="background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151">{ts}</span>
|
| 106 |
+
<span style="background:#eff6ff;padding:3px 9px;border-radius:12px;color:#1d4ed8">{model}</span>
|
| 107 |
+
<span style="background:#f0fdf4;padding:3px 9px;border-radius:12px;color:#15803d">Quality {quality:.4f}</span>
|
| 108 |
+
<span style="background:#fff7ed;padding:3px 9px;border-radius:12px;color:#c2410c">Ratio {ratio:.4f}</span>
|
| 109 |
+
<span style="background:#faf5ff;padding:3px 9px;border-radius:12px;color:#7e22ce">⏱ {duration} ms</span>
|
| 110 |
+
</div>
|
| 111 |
+
|
| 112 |
+
<!-- Secondary meta chips -->
|
| 113 |
+
<div style="display:flex;flex-wrap:wrap;gap:6px;margin-bottom:12px;font-size:0.78rem">
|
| 114 |
+
<span style="background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151">{in_tok} in → {out_tok} out (target {target_tok})</span>
|
| 115 |
+
<span style="background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151">tokenizer: {tokenizer}</span>
|
| 116 |
+
</div>
|
| 117 |
+
|
| 118 |
+
<!-- Side-by-side panels -->
|
| 119 |
+
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px">
|
| 120 |
+
|
| 121 |
+
<!-- Original -->
|
| 122 |
+
<div style="border:1px solid #fecaca;border-radius:8px;overflow:hidden">
|
| 123 |
+
<div style="background:#fef2f2;padding:8px 14px;border-bottom:1px solid #fecaca;
|
| 124 |
+
display:flex;justify-content:space-between;align-items:center">
|
| 125 |
+
<span style="font-weight:700;font-size:0.8rem;color:#b91c1c;letter-spacing:.04em">ORIGINAL</span>
|
| 126 |
+
<span style="font-size:0.75rem;color:#6b7280">{in_tok} tokens</span>
|
| 127 |
+
</div>
|
| 128 |
+
<div style="padding:14px;line-height:1.8;font-size:0.875rem;color:#1a1a1a;
|
| 129 |
+
max-height:340px;overflow-y:auto;word-break:break-word">
|
| 130 |
+
{orig_html}
|
| 131 |
+
</div>
|
| 132 |
+
</div>
|
| 133 |
+
|
| 134 |
+
<!-- Compressed -->
|
| 135 |
+
<div style="border:1px solid #bbf7d0;border-radius:8px;overflow:hidden">
|
| 136 |
+
<div style="background:#f0fdf4;padding:8px 14px;border-bottom:1px solid #bbf7d0;
|
| 137 |
+
display:flex;justify-content:space-between;align-items:center">
|
| 138 |
+
<span style="font-weight:700;font-size:0.8rem;color:#15803d;letter-spacing:.04em">COMPRESSED</span>
|
| 139 |
+
<span style="font-size:0.75rem;color:#6b7280">{out_tok} tokens</span>
|
| 140 |
+
</div>
|
| 141 |
+
<div style="padding:14px;line-height:1.8;font-size:0.875rem;color:#1a1a1a;
|
| 142 |
+
max-height:340px;overflow-y:auto;word-break:break-word">
|
| 143 |
+
{comp_html}
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
|
| 147 |
+
</div>
|
| 148 |
+
|
| 149 |
+
{feedback_block}
|
| 150 |
+
|
| 151 |
+
<!-- Legend -->
|
| 152 |
+
<div style="display:flex;flex-wrap:wrap;gap:14px;margin-top:10px;font-size:0.75rem;color:#6b7280;align-items:center">
|
| 153 |
+
<mark style="background:#fee2e2;color:#b91c1c;text-decoration:line-through;padding:2px 7px;border-radius:3px">dropped</mark>
|
| 154 |
+
<mark style="background:#fef9c3;color:#92400e;padding:2px 7px;border-radius:3px">rewritten</mark>
|
| 155 |
+
<mark style="background:#dcfce7;color:#15803d;padding:2px 7px;border-radius:3px">inserted</mark>
|
| 156 |
+
<span>plain = unchanged</span>
|
| 157 |
+
</div>
|
| 158 |
+
|
| 159 |
+
</div>
|
| 160 |
+
"""
|
core/scorer.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from models.model_loader import get_embedder
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def semantic_score(original: str, compressed: str) -> float:
|
| 6 |
+
embedder = get_embedder()
|
| 7 |
+
vecs = embedder.encode([original, compressed], convert_to_numpy=True)
|
| 8 |
+
cos = float(
|
| 9 |
+
np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
|
| 10 |
+
)
|
| 11 |
+
return round(max(0.0, min(1.0, cos)), 4)
|
core/tokenizer_utils.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from models.model_loader import get_llm
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def count_tokens(text: str) -> int:
|
| 5 |
+
_, tokenizer = get_llm()
|
| 6 |
+
return len(tokenizer.encode(text, add_special_tokens=False))
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_token_strings(text: str) -> list[str]:
|
| 10 |
+
"""Return the decoded surface string for every token in text."""
|
| 11 |
+
_, tokenizer = get_llm()
|
| 12 |
+
ids = tokenizer.encode(text, add_special_tokens=False)
|
| 13 |
+
return [tokenizer.decode([i]) for i in ids]
|
db/schema.sql
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CREATE TABLE IF NOT EXISTS compression_runs (
|
| 2 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 3 |
+
timestamp TEXT NOT NULL,
|
| 4 |
+
model TEXT NOT NULL,
|
| 5 |
+
tokenizer TEXT NOT NULL,
|
| 6 |
+
input_tokens INTEGER NOT NULL,
|
| 7 |
+
output_tokens INTEGER NOT NULL,
|
| 8 |
+
target_tokens INTEGER NOT NULL,
|
| 9 |
+
compression_ratio REAL NOT NULL,
|
| 10 |
+
quality_score REAL NOT NULL,
|
| 11 |
+
duration_ms REAL NOT NULL,
|
| 12 |
+
input_text TEXT NOT NULL,
|
| 13 |
+
output_text TEXT NOT NULL,
|
| 14 |
+
feedback INTEGER,
|
| 15 |
+
feedback_comment TEXT
|
| 16 |
+
);
|
db/store.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import config
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _connect():
|
| 7 |
+
conn = sqlite3.connect(config.DB_PATH)
|
| 8 |
+
conn.row_factory = sqlite3.Row
|
| 9 |
+
return conn
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def init_db():
|
| 13 |
+
schema = Path(__file__).parent / "schema.sql"
|
| 14 |
+
conn = _connect()
|
| 15 |
+
conn.executescript(schema.read_text())
|
| 16 |
+
# Migrate existing databases that pre-date new columns.
|
| 17 |
+
for col, typedef in [("tokenizer", "TEXT NOT NULL DEFAULT ''"), ("duration_ms", "REAL NOT NULL DEFAULT 0"), ("feedback", "INTEGER"), ("feedback_comment", "TEXT")]:
|
| 18 |
+
try:
|
| 19 |
+
conn.execute(f"ALTER TABLE compression_runs ADD COLUMN {col} {typedef}")
|
| 20 |
+
except sqlite3.OperationalError:
|
| 21 |
+
pass # column already exists
|
| 22 |
+
conn.commit()
|
| 23 |
+
conn.close()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def save_run(record: dict) -> int:
|
| 27 |
+
conn = _connect()
|
| 28 |
+
cursor = conn.execute(
|
| 29 |
+
"""
|
| 30 |
+
INSERT INTO compression_runs
|
| 31 |
+
(timestamp, model, tokenizer, input_tokens, output_tokens, target_tokens,
|
| 32 |
+
compression_ratio, quality_score, duration_ms, input_text, output_text)
|
| 33 |
+
VALUES
|
| 34 |
+
(:timestamp, :model, :tokenizer, :input_tokens, :output_tokens, :target_tokens,
|
| 35 |
+
:compression_ratio, :quality_score, :duration_ms, :input_text, :output_text)
|
| 36 |
+
""",
|
| 37 |
+
record,
|
| 38 |
+
)
|
| 39 |
+
run_id = cursor.lastrowid
|
| 40 |
+
conn.commit()
|
| 41 |
+
conn.close()
|
| 42 |
+
return run_id
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def update_feedback(run_id: int, value: int):
|
| 46 |
+
conn = _connect()
|
| 47 |
+
conn.execute(
|
| 48 |
+
"UPDATE compression_runs SET feedback = ? WHERE id = ?",
|
| 49 |
+
(value, run_id),
|
| 50 |
+
)
|
| 51 |
+
conn.commit()
|
| 52 |
+
conn.close()
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def update_feedback_comment(run_id: int, comment: str):
|
| 56 |
+
conn = _connect()
|
| 57 |
+
conn.execute(
|
| 58 |
+
"UPDATE compression_runs SET feedback_comment = ? WHERE id = ?",
|
| 59 |
+
(comment, run_id),
|
| 60 |
+
)
|
| 61 |
+
conn.commit()
|
| 62 |
+
conn.close()
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def delete_run(run_id: int):
|
| 66 |
+
conn = _connect()
|
| 67 |
+
conn.execute("DELETE FROM compression_runs WHERE id = ?", (run_id,))
|
| 68 |
+
conn.commit()
|
| 69 |
+
conn.close()
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_run(run_id: int) -> dict | None:
|
| 73 |
+
conn = _connect()
|
| 74 |
+
row = conn.execute(
|
| 75 |
+
"SELECT * FROM compression_runs WHERE id = ?", (run_id,)
|
| 76 |
+
).fetchone()
|
| 77 |
+
conn.close()
|
| 78 |
+
return dict(row) if row else None
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def get_runs(limit: int = 100) -> list[dict]:
|
| 82 |
+
conn = _connect()
|
| 83 |
+
rows = conn.execute(
|
| 84 |
+
"SELECT * FROM compression_runs ORDER BY id DESC LIMIT ?", (limit,)
|
| 85 |
+
).fetchall()
|
| 86 |
+
conn.close()
|
| 87 |
+
return [dict(r) for r in rows]
|
docs/architecture.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Architecture
|
| 2 |
+
|
| 3 |
+
TinyPress is built modular — each concern lives in its own place, nothing bleeds into something it shouldn't.
|
| 4 |
+
|
| 5 |
+
## How a compression request flows
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
User Input (Gradio UI)
|
| 9 |
+
│
|
| 10 |
+
▼
|
| 11 |
+
core/compressor.py ← builds the prompt, calls the model, trims if it overshoots
|
| 12 |
+
│
|
| 13 |
+
▼
|
| 14 |
+
models/model_loader.py ← Qwen2.5-1.5B-Instruct, loaded once and reused
|
| 15 |
+
│
|
| 16 |
+
▼
|
| 17 |
+
core/scorer.py ← checks how much meaning survived using all-MiniLM-L6-v2
|
| 18 |
+
│
|
| 19 |
+
▼
|
| 20 |
+
db/store.py ← saves the run to SQLite
|
| 21 |
+
│
|
| 22 |
+
▼
|
| 23 |
+
ui/compress_tab.py ← shows the result and metrics back to the user
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
## What each module does
|
| 27 |
+
|
| 28 |
+
| Module | Responsibility |
|
| 29 |
+
|---|---|
|
| 30 |
+
| `app.py` | Starts everything — DB init, model load, Gradio launch |
|
| 31 |
+
| `config.py` | One place for all settings — model names, token limits, DB path, port |
|
| 32 |
+
| `ui/compress_tab.py` | The compression interface — input, slider, output, metrics |
|
| 33 |
+
| `ui/history_tab.py` | History view — past runs, averages, trends |
|
| 34 |
+
| `core/compressor.py` | Builds the compression prompt, runs generation, hard-trims if needed |
|
| 35 |
+
| `core/scorer.py` | Cosine similarity between original and compressed text |
|
| 36 |
+
| `core/tokenizer_utils.py` | Token counting and per-token string extraction using the LLM's own tokenizer |
|
| 37 |
+
| `core/diff.py` | Word-level SequenceMatcher diff — produces annotated HTML for the history side-by-side view |
|
| 38 |
+
| `models/model_loader.py` | Singleton model store — loads LLM + embedder on demand, supports hot-swapping both via `switch_llm` / `switch_embedder` |
|
| 39 |
+
| `db/store.py` | SQLite operations — init, save a run, fetch history, delete a run; auto-migrates older DBs |
|
| 40 |
+
| `db/schema.sql` | The `compression_runs` table definition |
|
| 41 |
+
|
| 42 |
+
## A few decisions worth knowing
|
| 43 |
+
|
| 44 |
+
**Models load once at startup.** This matters on a laptop — you don't want to reload a 1.5B model on every request. Both the LLM and the embedder are held in memory after the first load.
|
| 45 |
+
|
| 46 |
+
**Model hot-swapping without a restart.** The Model Settings accordion in the UI lets you pick a different compression model or scoring embedder mid-session. Both `switch_llm` and `switch_embedder` in `model_loader.py` unload the current model (deletes the references, calls `gc.collect`, and flushes the CUDA cache if a GPU is present) before loading the new one — so you don't end up with two large models in memory at once.
|
| 47 |
+
|
| 48 |
+
**Hard token trim as a safety net.** If the model overshoots the target budget, the output gets trimmed at the tokenizer level. It's a fallback, not the primary path — the prompt already asks the model to stay within budget.
|
| 49 |
+
|
| 50 |
+
**Thin UI layer.** The Gradio handlers in `ui/` don't contain logic. They take inputs, call into `core/`, and return outputs. All the real work happens in `core/` and `db/`.
|
| 51 |
+
|
| 52 |
+
**DB auto-migration.** `store.py` runs `ALTER TABLE … ADD COLUMN` for `tokenizer`, `duration_ms`, `feedback`, and `feedback_comment` on startup — so existing databases from earlier builds upgrade silently rather than crashing. `feedback` is nullable (`INTEGER`): `NULL` = no rating, `1` = 👍, `-1` = 👎. `feedback_comment` holds the optional text note.
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
🏠 [README.md](../README.md)
|
docs/enhancements.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Enhancements
|
| 2 |
+
|
| 3 |
+
The hackathon MVP covers the core loop well. Here's where it could go next.
|
| 4 |
+
|
| 5 |
+
## Quick wins
|
| 6 |
+
|
| 7 |
+
- **Batch compression** — let users paste multiple texts and compress them all at once
|
| 8 |
+
- **Export history** — download past runs as a CSV straight from the History tab
|
| 9 |
+
- **Named presets** — save favourite token budget + model combinations and reuse them
|
| 10 |
+
- **`.env` support** — load config from a `.env` file instead of setting env vars manually
|
| 11 |
+
|
| 12 |
+
## Worth building next
|
| 13 |
+
|
| 14 |
+
- **Iterative compression** — if the quality score drops below a threshold, automatically retry with a slightly relaxed budget
|
| 15 |
+
- **Custom focus instructions** — let the user say "keep all numbers" or "preserve action items only" before compressing
|
| 16 |
+
- **Chunked compression** — handle inputs that exceed the model's context window by splitting, compressing each chunk, then merging
|
| 17 |
+
- **REST API** — a simple `/compress` endpoint via Flask so other tools can call TinyPress programmatically
|
| 18 |
+
|
| 19 |
+
## Longer term
|
| 20 |
+
|
| 21 |
+
- **VS Code extension** — compress selected text without leaving the editor
|
| 22 |
+
- **CLI tool** — `tinypress compress --budget 500 input.txt` for terminal users
|
| 23 |
+
- **Hosted version** — a SaaS wrapper with usage tracking and team history
|
| 24 |
+
- **Domain-specific fine-tuning** — train a compressor specialised for legal, medical, or code content
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
🏠 [README.md](../README.md)
|
docs/folder-structure.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Folder Structure
|
| 2 |
+
|
| 3 |
+
```
|
| 4 |
+
app/
|
| 5 |
+
├── app.py # Entry point
|
| 6 |
+
├── config.py # All tunable settings
|
| 7 |
+
├── requirements.txt # Pinned Python dependencies
|
| 8 |
+
├── tinypress.db # SQLite DB (auto-created on first run)
|
| 9 |
+
│
|
| 10 |
+
├── ui/
|
| 11 |
+
│ ├── compress_tab.py # Compression UI tab
|
| 12 |
+
│ └── history_tab.py # Metrics history tab
|
| 13 |
+
│
|
| 14 |
+
├── core/
|
| 15 |
+
│ ├── compressor.py # Compression pipeline logic
|
| 16 |
+
│ ├── scorer.py # Semantic similarity scoring
|
| 17 |
+
│ ├── tokenizer_utils.py # Token counting helpers
|
| 18 |
+
│ └── diff.py # Word-level diff + HTML renderer for history view
|
| 19 |
+
│
|
| 20 |
+
├── models/
|
| 21 |
+
│ └── model_loader.py # Lazy model + embedder loading
|
| 22 |
+
│
|
| 23 |
+
├── db/
|
| 24 |
+
│ ├── schema.sql # SQLite table definitions
|
| 25 |
+
│ └── store.py # DB read/write operations
|
| 26 |
+
│
|
| 27 |
+
├── docs/ # Project documentation
|
| 28 |
+
│ ├── architecture.md
|
| 29 |
+
│ ├── folder-structure.md
|
| 30 |
+
│ ├── setup.md
|
| 31 |
+
│ ├── get-started.md
|
| 32 |
+
│ └── enhancements.md
|
| 33 |
+
│
|
| 34 |
+
├── my-notes/ # Planning notes (not part of the app)
|
| 35 |
+
│ └── overall-idea.md
|
| 36 |
+
│
|
| 37 |
+
└── claude-grounding/ # Context files for Claude (not part of the app)
|
| 38 |
+
├── hackathon.md
|
| 39 |
+
├── tech-stack.md
|
| 40 |
+
└── about-me.md
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
🏠 [README.md](../README.md)
|
docs/get-started.md
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Get Started
|
| 2 |
+
|
| 3 |
+
Once `python app.py` is running, head to `http://localhost:7860` in your browser. You'll see two tabs.
|
| 4 |
+
|
| 5 |
+
## Compress tab
|
| 6 |
+
|
| 7 |
+
This is where the action is.
|
| 8 |
+
|
| 9 |
+
1. Paste your text — could be a long prompt, meeting notes, an article, anything really
|
| 10 |
+
2. Use the slider to set your token budget (anywhere from 100 to 1000)
|
| 11 |
+
3. Hit **Compress**
|
| 12 |
+
|
| 13 |
+
As you type or adjust the slider, a status banner updates live:
|
| 14 |
+
- **Green** — the input is over budget, compression will run
|
| 15 |
+
- **Red** — the input is already within budget, nothing to do
|
| 16 |
+
|
| 17 |
+
On the right you'll see:
|
| 18 |
+
- The compressed version of your text
|
| 19 |
+
- How many tokens went in vs came out
|
| 20 |
+
- The compression ratio (how much it shrank)
|
| 21 |
+
- A quality score between 0 and 1 — closer to 1 means the meaning held up well
|
| 22 |
+
|
| 23 |
+
Once the result appears, **👍 Helpful** and **👎 Not helpful** buttons show up below the metrics. Click either one to rate the result — the feedback is saved instantly. A note field then slides in where you can optionally type what worked well or didn't (e.g. "lost key dates", "too short", "great summary") and hit **Save note**. Both the rating and the note are stored with the run and visible in the History tab.
|
| 24 |
+
|
| 25 |
+
Every run saves automatically in the background. You don't need to do anything.
|
| 26 |
+
|
| 27 |
+
### Token Highlights
|
| 28 |
+
|
| 29 |
+
Below the input box there's a **Show Token Highlights** button. Click it and each token in your input gets rendered as a colour-coded chip — useful for seeing exactly where your budget is going. The panel updates live as you type. Click again to hide it.
|
| 30 |
+
|
| 31 |
+
### Switching the compression model
|
| 32 |
+
|
| 33 |
+
Click **Model Settings** at the top of the tab to expand the accordion. Pick a model from the dropdown (or type a custom HuggingFace model ID) and hit **Load Model**. The current model is unloaded from memory first, then the new one loads — no restart needed. The status box confirms when it's ready.
|
| 34 |
+
|
| 35 |
+
Available presets: Qwen2.5-1.5B-Instruct (default), Qwen2.5-0.5B-Instruct, SmolLM2-1.7B-Instruct, Phi-3.5-mini-instruct, Llama-3.2-1B-Instruct.
|
| 36 |
+
|
| 37 |
+
### Switching the scoring embedder
|
| 38 |
+
|
| 39 |
+
Below the compression model section in the same accordion, there's a separate **Embedder Model** dropdown. The embedder is what computes the quality score — changing it affects how accurately that score reflects meaning retention.
|
| 40 |
+
|
| 41 |
+
When you select a model from the dropdown, an info panel updates immediately to explain the trade-off:
|
| 42 |
+
- ⚡ **Fast** models (MiniLM, bge-small) — low overhead, good baseline scores, CPU-friendly
|
| 43 |
+
- ⚖️ **Balanced** models (mpnet, bge-base) — more discriminating scores, small speed cost
|
| 44 |
+
- 🏆 **High quality** models (mxbai-large) — most accurate scores, GPU recommended
|
| 45 |
+
- 🔬 **Best quality** models (gte-Qwen2-1.5B) — catches subtle meaning loss, requires significant RAM/VRAM
|
| 46 |
+
|
| 47 |
+
Hit **Load Embedder** to apply the selection. The previous embedder is unloaded from memory before the new one loads.
|
| 48 |
+
|
| 49 |
+
## History tab
|
| 50 |
+
|
| 51 |
+
Click over here to see everything that's been compressed so far.
|
| 52 |
+
|
| 53 |
+
The table loads automatically when you open the tab. Hit **Refresh** to pull in the latest runs. At the top you'll find the average quality score and compression ratio across all sessions — a quick way to see how the tool is performing over time.
|
| 54 |
+
|
| 55 |
+
### Column visibility
|
| 56 |
+
|
| 57 |
+
By default the table shows: `id`, `timestamp`, `model`, `compression_ratio`, `quality_score`, `feedback`. Open the **Column visibility** accordion above the table to toggle any additional columns on or off — changes apply instantly without a refresh.
|
| 58 |
+
|
| 59 |
+
### Side-by-side diff
|
| 60 |
+
|
| 61 |
+
Click any row in the table and a word-level diff panel opens below it. Words are colour-coded:
|
| 62 |
+
- Red strikethrough — dropped from the original
|
| 63 |
+
- Amber — rewritten by the model
|
| 64 |
+
- Green — inserted (rare connector words)
|
| 65 |
+
- Plain — survived unchanged
|
| 66 |
+
|
| 67 |
+
### Deleting a run
|
| 68 |
+
|
| 69 |
+
Click a row to select it, then hit **Delete Selected Row**. The table refreshes and the aggregate stats update automatically.
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
🏠 [README.md](../README.md)
|
docs/setup.md
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Setup
|
| 2 |
+
|
| 3 |
+
## What you need
|
| 4 |
+
|
| 5 |
+
**Hardware**
|
| 6 |
+
|
| 7 |
+
| | Minimum (CPU) | Recommended (GPU) |
|
| 8 |
+
|---|---|---|
|
| 9 |
+
| RAM | 8 GB | 8 GB+ |
|
| 10 |
+
| VRAM | — | 4 GB (e.g. NVIDIA T4) |
|
| 11 |
+
| Disk | ~4 GB free | ~4 GB free |
|
| 12 |
+
| Inference speed | Slow (float32) | Fast (float16, auto device map) |
|
| 13 |
+
|
| 14 |
+
The default model (Qwen2.5-1.5B-Instruct) fits in 4 GB VRAM. Larger models from the dropdown (e.g. Phi-3.5-mini) need more headroom.
|
| 15 |
+
|
| 16 |
+
**Software**
|
| 17 |
+
|
| 18 |
+
- Python 3.10 or above
|
| 19 |
+
|
| 20 |
+
**Network**
|
| 21 |
+
|
| 22 |
+
- Internet required on first run only — model weights (~3.5 GB total) download from HuggingFace and are cached locally
|
| 23 |
+
- Fully offline after that
|
| 24 |
+
|
| 25 |
+
## Steps
|
| 26 |
+
|
| 27 |
+
**1. Navigate to the project folder**
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
cd app
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
**2. Create and activate the virtual environment**
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
python -m venv .venv
|
| 37 |
+
.venv\Scripts\activate # Windows
|
| 38 |
+
# source .venv/bin/activate # macOS/Linux
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
**3. Install dependencies**
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
pip install -r requirements.txt
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
**4. Optionally tweak the defaults**
|
| 48 |
+
|
| 49 |
+
You can override any of these via environment variables if needed:
|
| 50 |
+
|
| 51 |
+
| Variable | Default | What it does |
|
| 52 |
+
|---|---|---|
|
| 53 |
+
| `LLM_MODEL` | `Qwen/Qwen2.5-1.5B-Instruct` | The model used for compression |
|
| 54 |
+
| `EMBEDDER_MODEL` | `sentence-transformers/all-MiniLM-L6-v2` | Used to score compression quality |
|
| 55 |
+
| `DB_PATH` | `tinypress.db` | Where the SQLite database lives |
|
| 56 |
+
| `PORT` | `7860` | Port the Gradio app listens on |
|
| 57 |
+
|
| 58 |
+
**5. Run it**
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
python app.py
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
The first time you run it, model weights will download from HuggingFace automatically. After that, everything runs from local cache.
|
| 65 |
+
|
| 66 |
+
## Managing dependencies
|
| 67 |
+
|
| 68 |
+
**Installing a new package**
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
pip install <package-name>
|
| 72 |
+
pip freeze > requirements.txt
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
**Removing a package**
|
| 76 |
+
|
| 77 |
+
```bash
|
| 78 |
+
pip uninstall <package-name>
|
| 79 |
+
pip freeze > requirements.txt
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
Always run `pip freeze > requirements.txt` after any install or uninstall — that keeps the file in sync with what's actually in your environment.
|
| 83 |
+
|
| 84 |
+
## Deactivating the virtual environment
|
| 85 |
+
|
| 86 |
+
When you're done, just run:
|
| 87 |
+
|
| 88 |
+
```bash
|
| 89 |
+
deactivate
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
That drops you back to your system Python. Next time, activate again with `.venv\Scripts\activate` before working on the project.
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
🏠 [README.md](../README.md)
|
models/model_loader.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
import torch
|
| 4 |
+
import gc
|
| 5 |
+
import config
|
| 6 |
+
|
| 7 |
+
_llm = None
|
| 8 |
+
_tokenizer = None
|
| 9 |
+
_embedder = None
|
| 10 |
+
_current_model_id = None
|
| 11 |
+
_current_embedder_id = None
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_current_model_id() -> str | None:
|
| 15 |
+
return _current_model_id
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_current_tokenizer_id() -> str | None:
|
| 19 |
+
# Tokenizer is always loaded from the same HF repo as the model.
|
| 20 |
+
return _current_model_id
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_current_embedder_id() -> str | None:
|
| 24 |
+
return _current_embedder_id
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_llm():
|
| 28 |
+
global _llm, _tokenizer
|
| 29 |
+
if _llm is None:
|
| 30 |
+
_load_llm(config.LLM_MODEL)
|
| 31 |
+
return _llm, _tokenizer
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def switch_llm(model_id: str) -> str:
|
| 35 |
+
global _current_model_id
|
| 36 |
+
if _current_model_id == model_id:
|
| 37 |
+
return f"Already using {model_id}"
|
| 38 |
+
_unload_llm()
|
| 39 |
+
_load_llm(model_id)
|
| 40 |
+
return f"Loaded: {model_id}"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _load_llm(model_id: str):
|
| 44 |
+
"""Load model + its paired tokenizer. Both come from the same model_id."""
|
| 45 |
+
global _llm, _tokenizer, _current_model_id
|
| 46 |
+
_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 47 |
+
_llm = AutoModelForCausalLM.from_pretrained(
|
| 48 |
+
model_id,
|
| 49 |
+
torch_dtype=torch.float32,
|
| 50 |
+
device_map="auto",
|
| 51 |
+
)
|
| 52 |
+
_llm.eval()
|
| 53 |
+
_current_model_id = model_id
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _unload_llm():
|
| 57 |
+
"""Free GPU/CPU memory before loading a different model."""
|
| 58 |
+
global _llm, _tokenizer, _current_model_id
|
| 59 |
+
del _llm
|
| 60 |
+
del _tokenizer
|
| 61 |
+
_llm = None
|
| 62 |
+
_tokenizer = None
|
| 63 |
+
_current_model_id = None
|
| 64 |
+
gc.collect()
|
| 65 |
+
if torch.cuda.is_available():
|
| 66 |
+
torch.cuda.empty_cache()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def get_embedder():
|
| 70 |
+
global _embedder, _current_embedder_id
|
| 71 |
+
if _embedder is None:
|
| 72 |
+
_load_embedder(config.EMBEDDER_MODEL)
|
| 73 |
+
return _embedder
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def switch_embedder(model_id: str) -> str:
|
| 77 |
+
global _current_embedder_id
|
| 78 |
+
if _current_embedder_id == model_id:
|
| 79 |
+
return f"Already using {model_id}"
|
| 80 |
+
_unload_embedder()
|
| 81 |
+
_load_embedder(model_id)
|
| 82 |
+
return f"Loaded: {model_id}"
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _load_embedder(model_id: str):
|
| 86 |
+
global _embedder, _current_embedder_id
|
| 87 |
+
_embedder = SentenceTransformer(model_id)
|
| 88 |
+
_current_embedder_id = model_id
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _unload_embedder():
|
| 92 |
+
global _embedder, _current_embedder_id
|
| 93 |
+
del _embedder
|
| 94 |
+
_embedder = None
|
| 95 |
+
_current_embedder_id = None
|
| 96 |
+
gc.collect()
|
| 97 |
+
if torch.cuda.is_available():
|
| 98 |
+
torch.cuda.empty_cache()
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.0
|
| 2 |
+
transformers>=4.40.0
|
| 3 |
+
sentence-transformers>=3.0.0
|
| 4 |
+
torch>=2.2.0
|
| 5 |
+
numpy>=1.26.0
|
| 6 |
+
pandas>=2.0.0
|
| 7 |
+
accelerate>=0.30.0
|
| 8 |
+
huggingface_hub==0.25.2
|
tinypress_colab.ipynb
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 5,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"kernelspec": {
|
| 6 |
+
"display_name": "Python 3",
|
| 7 |
+
"language": "python",
|
| 8 |
+
"name": "python3"
|
| 9 |
+
},
|
| 10 |
+
"language_info": {
|
| 11 |
+
"name": "python",
|
| 12 |
+
"version": "3.10.0"
|
| 13 |
+
},
|
| 14 |
+
"colab": {
|
| 15 |
+
"provenance": [],
|
| 16 |
+
"gpuType": "T4"
|
| 17 |
+
},
|
| 18 |
+
"accelerator": "GPU"
|
| 19 |
+
},
|
| 20 |
+
"cells": [
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "markdown",
|
| 23 |
+
"id": "cell-title",
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"source": "# TinyPress — Prompt Compression Engine\n\n**HuggingFace Build Small Hackathon · Track: Thousand Token Wood**\n\n| Layer | Detail |\n|-------|--------|\n| Compression | `Qwen/Qwen2.5-1.5B-Instruct` (default, switchable) |\n| Scoring | `sentence-transformers/all-MiniLM-L6-v2` (default, switchable) |\n| UI | Gradio 5 — public share URL |\n| Storage | SQLite at `/content/tinypress.db` |\n\n**Features**\n- Compress text to a user-defined token budget\n- Live 🔴 / 🟢 compression readiness banner\n- Per-token colour highlight panel (toggle on/off)\n- Dynamic compression model switching (5 curated <32B models)\n- Dynamic scoring embedder switching (6 models, with per-model impact info)\n- 👍 / 👎 feedback on every compression result, with optional text comment\n- Compression run history persisted to SQLite\n- Column picker in History tab — compact default view, expandable to all fields\n- Per-row delete in history\n- Side-by-side word-level diff viewer with feedback badge and token detail\n\n> **Recommended runtime:** GPU → Runtime → Change runtime type → T4 GPU\n\n---\n\n### About the author\n\nBuilt by **Sriharsha C R** — AI Engineer, Cloud Native developer, and knowledge sharer.\nIf this was useful, feel free to connect — always happy to chat about AI, LLMs, or anything in between.\n\n[](https://www.linkedin.com/in/sriharsha-cr)\n[](https://x.com/sriharsha_cr)\n[](https://huggingface.co/sriharsha-cr)\n[](https://github.com/SriharshaCR)"
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "markdown",
|
| 29 |
+
"id": "cell-s1-hdr",
|
| 30 |
+
"metadata": {},
|
| 31 |
+
"source": [
|
| 32 |
+
"## Step 1 — Install dependencies"
|
| 33 |
+
]
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"cell_type": "code",
|
| 37 |
+
"execution_count": null,
|
| 38 |
+
"id": "cell-install",
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"outputs": [],
|
| 41 |
+
"source": [
|
| 42 |
+
"!pip install -q \\\n",
|
| 43 |
+
" \"gradio==5.0\" \\\n",
|
| 44 |
+
" \"transformers>=4.40.0\" \\\n",
|
| 45 |
+
" \"sentence-transformers>=3.0.0\" \\\n",
|
| 46 |
+
" \"torch>=2.2.0\" \\\n",
|
| 47 |
+
" \"numpy>=1.26.0\" \\\n",
|
| 48 |
+
" \"pandas>=2.0.0\" \\\n",
|
| 49 |
+
" \"accelerate>=0.30.0\" \\\n",
|
| 50 |
+
" \"huggingface_hub==0.25.2\""
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"cell_type": "markdown",
|
| 55 |
+
"id": "cell-s2-hdr",
|
| 56 |
+
"metadata": {},
|
| 57 |
+
"source": [
|
| 58 |
+
"## Step 2 — Runtime check"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "code",
|
| 63 |
+
"execution_count": null,
|
| 64 |
+
"id": "cell-runtime",
|
| 65 |
+
"metadata": {},
|
| 66 |
+
"outputs": [],
|
| 67 |
+
"source": [
|
| 68 |
+
"import torch\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
| 71 |
+
"dtype = torch.float16 if device == 'cuda' else torch.float32\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"print(f'Device : {device}')\n",
|
| 74 |
+
"if device == 'cuda':\n",
|
| 75 |
+
" print(f'GPU : {torch.cuda.get_device_name(0)}')\n",
|
| 76 |
+
" print(f'VRAM : {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')\n",
|
| 77 |
+
"print(f'dtype : {dtype}')"
|
| 78 |
+
]
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"cell_type": "markdown",
|
| 82 |
+
"id": "cell-s3-hdr",
|
| 83 |
+
"metadata": {},
|
| 84 |
+
"source": [
|
| 85 |
+
"## Step 3 — Configuration"
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"cell_type": "code",
|
| 90 |
+
"execution_count": null,
|
| 91 |
+
"id": "cell-config",
|
| 92 |
+
"metadata": {},
|
| 93 |
+
"outputs": [],
|
| 94 |
+
"source": "import os\n\nLLM_MODEL = os.getenv('LLM_MODEL', 'Qwen/Qwen2.5-1.5B-Instruct')\nEMBEDDER_MODEL = os.getenv('EMBEDDER_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')\nDB_PATH = os.getenv('DB_PATH', '/content/tinypress.db')\nSERVER_PORT = int(os.getenv('PORT', 7860))\n\nDEFAULT_TARGET_TOKENS = 500\nMAX_NEW_TOKENS = 1024\nAPP_TITLE = 'TinyPress'\n\n# Curated <32B open-weight causal LMs for local / Colab inference.\nAVAILABLE_MODELS = [\n 'Qwen/Qwen2.5-1.5B-Instruct',\n 'Qwen/Qwen2.5-0.5B-Instruct',\n 'HuggingFaceTB/SmolLM2-1.7B-Instruct',\n 'microsoft/Phi-3.5-mini-instruct',\n 'meta-llama/Llama-3.2-1B-Instruct',\n]\n\n# Curated sentence-transformer embedding models for quality scoring.\nAVAILABLE_EMBEDDER_MODELS = [\n 'sentence-transformers/all-MiniLM-L6-v2',\n 'sentence-transformers/all-mpnet-base-v2',\n 'BAAI/bge-small-en-v1.5',\n 'BAAI/bge-base-en-v1.5',\n 'mixedbread-ai/mxbai-embed-large-v1',\n 'Alibaba-NLP/gte-Qwen2-1.5B-instruct',\n]\n\nEMBEDDER_INFO = {\n 'sentence-transformers/all-MiniLM-L6-v2': (\n '⚡ **Fast · 22M params · Default** \\n'\n 'Great baseline. Scores are reliable for typical compression ratios. '\n 'Runs comfortably on CPU — minimal overhead.'\n ),\n 'sentence-transformers/all-mpnet-base-v2': (\n '⚖️ **Balanced · 110M params** \\n'\n 'Noticeably sharper quality scores than MiniLM, especially on longer texts. '\n 'Small speed trade-off; fine on CPU.'\n ),\n 'BAAI/bge-small-en-v1.5': (\n '⚡ **Fast · 33M params** \\n'\n 'Strong quality-to-size ratio — often matches MiniLM on accuracy while being '\n 'slightly more sensitive to meaning shifts. Good CPU option.'\n ),\n 'BAAI/bge-base-en-v1.5': (\n '⚖️ **Balanced · 109M params** \\n'\n 'Consistently strong on semantic similarity benchmarks. '\n 'Scores will be more discriminating — small differences in compression quality show up more clearly.'\n ),\n 'mixedbread-ai/mxbai-embed-large-v1': (\n '🏆 **High quality · 335M params** \\n'\n 'Top-tier similarity scores. Quality readings will be the most accurate here, '\n 'but slower to load and run. GPU recommended.'\n ),\n 'Alibaba-NLP/gte-Qwen2-1.5B-instruct': (\n '🔬 **Best quality · 1.5B params** \\n'\n 'Strongest semantic understanding in this list. Scores will reflect subtle meaning loss '\n 'that smaller models miss. Requires significant RAM/VRAM — GPU strongly recommended.'\n ),\n}\n\nprint(f'LLM : {LLM_MODEL}')\nprint(f'Embedder : {EMBEDDER_MODEL}')\nprint(f'DB : {DB_PATH}')"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "markdown",
|
| 98 |
+
"id": "cell-s4-hdr",
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"source": [
|
| 101 |
+
"## Step 4 — Model loader"
|
| 102 |
+
]
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"cell_type": "code",
|
| 106 |
+
"execution_count": null,
|
| 107 |
+
"id": "cell-model-loader",
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"outputs": [],
|
| 110 |
+
"source": "from transformers import AutoTokenizer, AutoModelForCausalLM\nfrom sentence_transformers import SentenceTransformer\nimport gc\n\n_llm = None\n_tokenizer = None\n_embedder = None\n_current_model_id = None\n_current_embedder_id = None\n\n\ndef get_current_model_id():\n return _current_model_id\n\n\ndef get_current_tokenizer_id():\n # Tokenizer is always loaded from the same HF repo as the model.\n return _current_model_id\n\n\ndef get_current_embedder_id():\n return _current_embedder_id\n\n\ndef get_llm():\n global _llm, _tokenizer\n if _llm is None:\n _load_llm(LLM_MODEL)\n return _llm, _tokenizer\n\n\ndef switch_llm(model_id: str) -> str:\n global _current_model_id\n if _current_model_id == model_id:\n return f'Already using {model_id}'\n _unload_llm()\n _load_llm(model_id)\n return f'Loaded: {model_id}'\n\n\ndef _load_llm(model_id: str):\n \"\"\"Load model + its paired tokenizer. Both come from the same model_id.\"\"\"\n global _llm, _tokenizer, _current_model_id\n print(f'Loading LLM: {model_id} ...')\n _tokenizer = AutoTokenizer.from_pretrained(model_id)\n _llm = AutoModelForCausalLM.from_pretrained(\n model_id,\n torch_dtype=dtype,\n device_map='auto',\n )\n _llm.eval()\n _current_model_id = model_id\n print(f'LLM ready: {model_id}')\n\n\ndef _unload_llm():\n \"\"\"Free GPU/CPU memory before loading a different model.\"\"\"\n global _llm, _tokenizer, _current_model_id\n del _llm, _tokenizer\n _llm = None\n _tokenizer = None\n _current_model_id = None\n gc.collect()\n if torch.cuda.is_available():\n torch.cuda.empty_cache()\n\n\ndef get_embedder():\n global _embedder, _current_embedder_id\n if _embedder is None:\n _load_embedder(EMBEDDER_MODEL)\n return _embedder\n\n\ndef switch_embedder(model_id: str) -> str:\n global _current_embedder_id\n if _current_embedder_id == model_id:\n return f'Already using {model_id}'\n _unload_embedder()\n _load_embedder(model_id)\n return f'Loaded: {model_id}'\n\n\ndef _load_embedder(model_id: str):\n global _embedder, _current_embedder_id\n print(f'Loading embedder: {model_id} ...')\n _embedder = SentenceTransformer(model_id)\n _current_embedder_id = model_id\n print(f'Embedder ready: {model_id}')\n\n\ndef _unload_embedder():\n global _embedder, _current_embedder_id\n del _embedder\n _embedder = None\n _current_embedder_id = None\n gc.collect()\n if torch.cuda.is_available():\n torch.cuda.empty_cache()\n\n\nprint('Model loader defined.')"
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"cell_type": "markdown",
|
| 114 |
+
"id": "cell-s5-hdr",
|
| 115 |
+
"metadata": {},
|
| 116 |
+
"source": [
|
| 117 |
+
"## Step 5 — Core pipeline"
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"cell_type": "code",
|
| 122 |
+
"execution_count": null,
|
| 123 |
+
"id": "cell-core",
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"outputs": [],
|
| 126 |
+
"source": [
|
| 127 |
+
"import numpy as np\n",
|
| 128 |
+
"\n",
|
| 129 |
+
"# ── tokenizer utils ───────────────────────────────────────────────────────────\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"def count_tokens(text: str) -> int:\n",
|
| 132 |
+
" _, tokenizer = get_llm()\n",
|
| 133 |
+
" return len(tokenizer.encode(text, add_special_tokens=False))\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"def get_token_strings(text: str) -> list:\n",
|
| 137 |
+
" \"\"\"Return the decoded surface string for every token in text.\"\"\"\n",
|
| 138 |
+
" _, tokenizer = get_llm()\n",
|
| 139 |
+
" ids = tokenizer.encode(text, add_special_tokens=False)\n",
|
| 140 |
+
" return [tokenizer.decode([i]) for i in ids]\n",
|
| 141 |
+
"\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"# ── compressor ────────────────────────────────────────────────────────────────\n",
|
| 144 |
+
"\n",
|
| 145 |
+
"_PROMPT_TEMPLATE = (\n",
|
| 146 |
+
" 'You are a lossless compression assistant. '\n",
|
| 147 |
+
" 'Compress the following text to at most {target} tokens.\\n'\n",
|
| 148 |
+
" 'Preserve all key facts, decisions, and intent. '\n",
|
| 149 |
+
" 'Do not add commentary. Output only the compressed text.\\n\\n'\n",
|
| 150 |
+
" 'TEXT:\\n{text}\\n\\nCOMPRESSED:'\n",
|
| 151 |
+
")\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"\n",
|
| 154 |
+
"def _generate(prompt: str) -> str:\n",
|
| 155 |
+
" model, tokenizer = get_llm()\n",
|
| 156 |
+
" inputs = tokenizer(prompt, return_tensors='pt').to(model.device)\n",
|
| 157 |
+
" with torch.no_grad():\n",
|
| 158 |
+
" output_ids = model.generate(\n",
|
| 159 |
+
" **inputs,\n",
|
| 160 |
+
" max_new_tokens=MAX_NEW_TOKENS,\n",
|
| 161 |
+
" do_sample=False,\n",
|
| 162 |
+
" pad_token_id=tokenizer.eos_token_id,\n",
|
| 163 |
+
" )\n",
|
| 164 |
+
" new_tokens = output_ids[0][inputs['input_ids'].shape[1]:]\n",
|
| 165 |
+
" return tokenizer.decode(new_tokens, skip_special_tokens=True).strip()\n",
|
| 166 |
+
"\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"def compress(text: str, target_tokens: int) -> tuple:\n",
|
| 169 |
+
" \"\"\"Returns (compressed_text, input_token_count, output_token_count).\"\"\"\n",
|
| 170 |
+
" input_tokens = count_tokens(text)\n",
|
| 171 |
+
" if input_tokens <= target_tokens:\n",
|
| 172 |
+
" return text, input_tokens, input_tokens\n",
|
| 173 |
+
"\n",
|
| 174 |
+
" prompt = _PROMPT_TEMPLATE.format(target=target_tokens, text=text)\n",
|
| 175 |
+
" compressed = _generate(prompt)\n",
|
| 176 |
+
"\n",
|
| 177 |
+
" # Hard-trim if model overshoots.\n",
|
| 178 |
+
" _, tokenizer = get_llm()\n",
|
| 179 |
+
" ids = tokenizer.encode(compressed, add_special_tokens=False)\n",
|
| 180 |
+
" if len(ids) > target_tokens:\n",
|
| 181 |
+
" compressed = tokenizer.decode(ids[:target_tokens], skip_special_tokens=True)\n",
|
| 182 |
+
"\n",
|
| 183 |
+
" output_tokens = count_tokens(compressed)\n",
|
| 184 |
+
" return compressed, input_tokens, output_tokens\n",
|
| 185 |
+
"\n",
|
| 186 |
+
"\n",
|
| 187 |
+
"# ── scorer ────────────────────────────────────────────────────────────────────\n",
|
| 188 |
+
"\n",
|
| 189 |
+
"def semantic_score(original: str, compressed: str) -> float:\n",
|
| 190 |
+
" embedder = get_embedder()\n",
|
| 191 |
+
" vecs = embedder.encode([original, compressed], convert_to_numpy=True)\n",
|
| 192 |
+
" cos = float(\n",
|
| 193 |
+
" np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))\n",
|
| 194 |
+
" )\n",
|
| 195 |
+
" return round(max(0.0, min(1.0, cos)), 4)\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"print('Core pipeline defined.')"
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"cell_type": "markdown",
|
| 203 |
+
"id": "cell-s6-hdr",
|
| 204 |
+
"metadata": {},
|
| 205 |
+
"source": [
|
| 206 |
+
"## Step 6 — Diff renderer"
|
| 207 |
+
]
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"cell_type": "code",
|
| 211 |
+
"execution_count": null,
|
| 212 |
+
"id": "cell-diff",
|
| 213 |
+
"metadata": {},
|
| 214 |
+
"outputs": [],
|
| 215 |
+
"source": "import difflib\nimport html as _h\n\n\ndef _word_diff(original: str, compressed: str) -> tuple:\n \"\"\"\n Word-level SequenceMatcher diff.\n Returns (annotated_original_html, annotated_compressed_html).\n Colour key:\n original — red strikethrough = dropped\n compressed — amber = rewritten\n compressed — green = inserted\n plain = unchanged\n \"\"\"\n orig_words = original.split()\n comp_words = compressed.split()\n matcher = difflib.SequenceMatcher(None, orig_words, comp_words, autojunk=False)\n\n orig_parts, comp_parts = [], []\n\n for tag, i1, i2, j1, j2 in matcher.get_opcodes():\n ow = _h.escape(' '.join(orig_words[i1:i2]))\n cw = _h.escape(' '.join(comp_words[j1:j2]))\n\n if tag == 'equal':\n orig_parts.append(ow)\n comp_parts.append(cw)\n\n elif tag == 'delete':\n orig_parts.append(\n f'<mark style=\"background:#fee2e2;color:#b91c1c;'\n f'text-decoration:line-through;padding:1px 3px;border-radius:3px\">{ow}</mark>'\n )\n\n elif tag == 'insert':\n comp_parts.append(\n f'<mark style=\"background:#dcfce7;color:#15803d;'\n f'padding:1px 3px;border-radius:3px\">{cw}</mark>'\n )\n\n elif tag == 'replace':\n orig_parts.append(\n f'<mark style=\"background:#fee2e2;color:#b91c1c;'\n f'text-decoration:line-through;padding:1px 3px;border-radius:3px\">{ow}</mark>'\n )\n comp_parts.append(\n f'<mark style=\"background:#fef9c3;color:#92400e;'\n f'padding:1px 3px;border-radius:3px\">{cw}</mark>'\n )\n\n return ' '.join(orig_parts), ' '.join(comp_parts)\n\n\ndef render_diff_html(record: dict) -> str:\n \"\"\"Build a self-contained side-by-side diff HTML block for a compression run.\"\"\"\n original = record.get('input_text', '')\n compressed = record.get('output_text', '')\n if not original or not compressed:\n return ''\n\n orig_html, comp_html = _word_diff(original, compressed)\n\n model = _h.escape(record.get('model', '—'))\n tokenizer = _h.escape(record.get('tokenizer', '—'))\n ts = _h.escape(record.get('timestamp', '—'))\n in_tok = record.get('input_tokens', '—')\n out_tok = record.get('output_tokens', '—')\n target_tok = record.get('target_tokens', '—')\n ratio = record.get('compression_ratio', 0)\n quality = record.get('quality_score', 0)\n duration = record.get('duration_ms', '—')\n run_id = record.get('id', '—')\n\n feedback_val = record.get('feedback')\n feedback_note = _h.escape(record.get('feedback_comment') or '')\n\n # Build optional feedback block\n if feedback_val is not None:\n badge_bg = '#f0fdf4' if feedback_val == 1 else '#fef2f2'\n badge_color = '#15803d' if feedback_val == 1 else '#b91c1c'\n badge_text = '👍 Helpful' if feedback_val == 1 else '👎 Not helpful'\n feedback_block = (\n f'<div style=\"display:flex;flex-wrap:wrap;align-items:center;gap:8px;'\n f'margin-top:10px;padding:8px 12px;border-radius:6px;background:{badge_bg}\">'\n f'<span style=\"font-weight:600;font-size:0.8rem;color:{badge_color}\">{badge_text}</span>'\n )\n if feedback_note:\n feedback_block += (\n f'<span style=\"font-size:0.8rem;color:#374151;font-style:italic\">'\n f'\"{feedback_note}\"</span>'\n )\n feedback_block += '</div>'\n else:\n feedback_block = ''\n\n return f\"\"\"\n<div style=\"font-family:system-ui,sans-serif;margin-top:4px\">\n\n <!-- Primary meta chips -->\n <div style=\"display:flex;flex-wrap:wrap;gap:6px;margin-bottom:6px;font-size:0.78rem\">\n <span style=\"background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151\">Run #{run_id}</span>\n <span style=\"background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151\">{ts}</span>\n <span style=\"background:#eff6ff;padding:3px 9px;border-radius:12px;color:#1d4ed8\">{model}</span>\n <span style=\"background:#f0fdf4;padding:3px 9px;border-radius:12px;color:#15803d\">Quality {quality:.4f}</span>\n <span style=\"background:#fff7ed;padding:3px 9px;border-radius:12px;color:#c2410c\">Ratio {ratio:.4f}</span>\n <span style=\"background:#faf5ff;padding:3px 9px;border-radius:12px;color:#7e22ce\">⏱ {duration} ms</span>\n </div>\n\n <!-- Secondary meta chips -->\n <div style=\"display:flex;flex-wrap:wrap;gap:6px;margin-bottom:12px;font-size:0.78rem\">\n <span style=\"background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151\">{in_tok} in → {out_tok} out (target {target_tok})</span>\n <span style=\"background:#f3f4f6;padding:3px 9px;border-radius:12px;color:#374151\">tokenizer: {tokenizer}</span>\n </div>\n\n <!-- Side-by-side panels -->\n <div style=\"display:grid;grid-template-columns:1fr 1fr;gap:12px\">\n <div style=\"border:1px solid #fecaca;border-radius:8px;overflow:hidden\">\n <div style=\"background:#fef2f2;padding:8px 14px;border-bottom:1px solid #fecaca;\n display:flex;justify-content:space-between;align-items:center\">\n <span style=\"font-weight:700;font-size:0.8rem;color:#b91c1c\">ORIGINAL</span>\n <span style=\"font-size:0.75rem;color:#6b7280\">{in_tok} tokens</span>\n </div>\n <div style=\"padding:14px;line-height:1.8;font-size:0.875rem;color:#1a1a1a;\n max-height:340px;overflow-y:auto;word-break:break-word\">{orig_html}</div>\n </div>\n <div style=\"border:1px solid #bbf7d0;border-radius:8px;overflow:hidden\">\n <div style=\"background:#f0fdf4;padding:8px 14px;border-bottom:1px solid #bbf7d0;\n display:flex;justify-content:space-between;align-items:center\">\n <span style=\"font-weight:700;font-size:0.8rem;color:#15803d\">COMPRESSED</span>\n <span style=\"font-size:0.75rem;color:#6b7280\">{out_tok} tokens</span>\n </div>\n <div style=\"padding:14px;line-height:1.8;font-size:0.875rem;color:#1a1a1a;\n max-height:340px;overflow-y:auto;word-break:break-word\">{comp_html}</div>\n </div>\n </div>\n\n {feedback_block}\n\n <!-- Legend -->\n <div style=\"display:flex;flex-wrap:wrap;gap:14px;margin-top:10px;font-size:0.75rem;color:#6b7280;align-items:center\">\n <mark style=\"background:#fee2e2;color:#b91c1c;text-decoration:line-through;padding:2px 7px;border-radius:3px\">dropped</mark>\n <mark style=\"background:#fef9c3;color:#92400e;padding:2px 7px;border-radius:3px\">rewritten</mark>\n <mark style=\"background:#dcfce7;color:#15803d;padding:2px 7px;border-radius:3px\">inserted</mark>\n <span>plain = unchanged</span>\n </div>\n\n</div>\n\"\"\"\n\n\nprint('Diff renderer defined.')"
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "markdown",
|
| 219 |
+
"id": "cell-s7-hdr",
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"source": [
|
| 222 |
+
"## Step 7 — Database"
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"cell_type": "code",
|
| 227 |
+
"execution_count": null,
|
| 228 |
+
"id": "cell-db",
|
| 229 |
+
"metadata": {},
|
| 230 |
+
"outputs": [],
|
| 231 |
+
"source": "import sqlite3\n\n_SCHEMA = \"\"\"\nCREATE TABLE IF NOT EXISTS compression_runs (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n timestamp TEXT NOT NULL,\n model TEXT NOT NULL,\n tokenizer TEXT NOT NULL,\n input_tokens INTEGER NOT NULL,\n output_tokens INTEGER NOT NULL,\n target_tokens INTEGER NOT NULL,\n compression_ratio REAL NOT NULL,\n quality_score REAL NOT NULL,\n duration_ms REAL NOT NULL,\n input_text TEXT NOT NULL,\n output_text TEXT NOT NULL,\n feedback INTEGER,\n feedback_comment TEXT\n);\n\"\"\"\n\n\ndef _connect():\n conn = sqlite3.connect(DB_PATH)\n conn.row_factory = sqlite3.Row\n return conn\n\n\ndef init_db():\n conn = _connect()\n conn.executescript(_SCHEMA)\n for col, typedef in [\n ('tokenizer', 'TEXT NOT NULL DEFAULT \"\"'),\n ('duration_ms', 'REAL NOT NULL DEFAULT 0'),\n ('feedback', 'INTEGER'),\n ('feedback_comment', 'TEXT'),\n ]:\n try:\n conn.execute(f'ALTER TABLE compression_runs ADD COLUMN {col} {typedef}')\n except sqlite3.OperationalError:\n pass\n conn.commit()\n conn.close()\n\n\ndef save_run(record: dict) -> int:\n conn = _connect()\n cursor = conn.execute(\n '''\n INSERT INTO compression_runs\n (timestamp, model, tokenizer, input_tokens, output_tokens, target_tokens,\n compression_ratio, quality_score, duration_ms, input_text, output_text)\n VALUES\n (:timestamp, :model, :tokenizer, :input_tokens, :output_tokens, :target_tokens,\n :compression_ratio, :quality_score, :duration_ms, :input_text, :output_text)\n ''',\n record,\n )\n run_id = cursor.lastrowid\n conn.commit()\n conn.close()\n return run_id\n\n\ndef update_feedback(run_id: int, value: int):\n conn = _connect()\n conn.execute('UPDATE compression_runs SET feedback = ? WHERE id = ?', (value, run_id))\n conn.commit()\n conn.close()\n\n\ndef update_feedback_comment(run_id: int, comment: str):\n conn = _connect()\n conn.execute('UPDATE compression_runs SET feedback_comment = ? WHERE id = ?', (comment, run_id))\n conn.commit()\n conn.close()\n\n\ndef delete_run(run_id: int):\n conn = _connect()\n conn.execute('DELETE FROM compression_runs WHERE id = ?', (run_id,))\n conn.commit()\n conn.close()\n\n\ndef get_run(run_id: int):\n conn = _connect()\n row = conn.execute('SELECT * FROM compression_runs WHERE id = ?', (run_id,)).fetchone()\n conn.close()\n return dict(row) if row else None\n\n\ndef get_runs(limit: int = 100) -> list:\n conn = _connect()\n rows = conn.execute(\n 'SELECT * FROM compression_runs ORDER BY id DESC LIMIT ?', (limit,)\n ).fetchall()\n conn.close()\n return [dict(r) for r in rows]\n\n\ninit_db()\nprint(f'Database ready at {DB_PATH}')"
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"cell_type": "markdown",
|
| 235 |
+
"id": "cell-s8-hdr",
|
| 236 |
+
"metadata": {},
|
| 237 |
+
"source": [
|
| 238 |
+
"## Step 8 — Load models\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"Downloads and caches weights. GPU warm-cache: ~30 s. First run: a few minutes."
|
| 241 |
+
]
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"cell_type": "code",
|
| 245 |
+
"execution_count": null,
|
| 246 |
+
"id": "cell-load-models",
|
| 247 |
+
"metadata": {},
|
| 248 |
+
"outputs": [],
|
| 249 |
+
"source": [
|
| 250 |
+
"get_llm()\n",
|
| 251 |
+
"get_embedder()\n",
|
| 252 |
+
"print('\\nAll models loaded and ready.')"
|
| 253 |
+
]
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"cell_type": "markdown",
|
| 257 |
+
"id": "cell-s9-hdr",
|
| 258 |
+
"metadata": {},
|
| 259 |
+
"source": [
|
| 260 |
+
"## Step 9 — Launch Gradio UI\n",
|
| 261 |
+
"\n",
|
| 262 |
+
"Prints a **public share URL** when ready. All features are live in the UI."
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"cell_type": "code",
|
| 267 |
+
"execution_count": null,
|
| 268 |
+
"id": "cell-gradio",
|
| 269 |
+
"metadata": {},
|
| 270 |
+
"outputs": [],
|
| 271 |
+
"source": "import html as _h\nimport time\nfrom datetime import datetime, timezone\n\nimport gradio as gr\nimport pandas as pd\n\n\n# ══════════════════════════════════════════════════════════════════════════════\n# COMPRESS TAB — handlers\n# ══════════════════════════════════════════════════���═══════════════════════════\n\n_PALETTE = [\n '#fde68a', '#bbf7d0', '#bfdbfe', '#fecaca', '#e9d5ff',\n '#fed7aa', '#99f6e4', '#e0e7ff', '#fce7f3', '#d1fae5',\n]\n_BTN_SHOW = '🔍 Show Token Highlights'\n_BTN_HIDE = '🙈 Hide Token Highlights'\n\n\ndef _render_token_html(text: str) -> str:\n if not text.strip():\n return ''\n tokens = get_token_strings(text)\n if not tokens:\n return ''\n spans = []\n for i, tok in enumerate(tokens):\n color = _PALETTE[i % len(_PALETTE)]\n display = _h.escape(tok).replace(' ', '<span style=\"opacity:0.35;font-size:0.7em\">·</span>')\n spans.append(\n f'<span title=\"token {i+1}\" '\n f'style=\"background:{color};border-radius:4px;padding:2px 5px;'\n f'font-family:\\'Courier New\\',monospace;font-size:0.8rem;'\n f'line-height:2.2;margin:2px 1px;display:inline-block;'\n f'cursor:default;border:1px solid rgba(0,0,0,0.06)\">{display}</span>'\n )\n return (\n '<div style=\"font-family:system-ui,sans-serif;padding:10px 12px;'\n 'border:1px solid #e5e7eb;border-radius:8px;background:#fafafa\">'\n f'<div style=\"font-size:0.75rem;color:#6b7280;margin-bottom:8px;font-weight:500\">'\n f'{len(tokens)} tokens — each chip = one token, hover for index</div>'\n '<div style=\"line-height:2.6;word-break:break-all;max-height:200px;overflow-y:auto\">'\n + ''.join(spans) + '</div></div>'\n )\n\n\ndef toggle_token_panel(is_visible: bool, text: str):\n new_visible = not is_visible\n html_content = _render_token_html(text) if new_visible else ''\n btn_label = _BTN_HIDE if new_visible else _BTN_SHOW\n return new_visible, html_content, gr.update(value=btn_label)\n\n\ndef update_token_panel(text: str, is_visible: bool) -> str:\n return _render_token_html(text) if is_visible else ''\n\n\n_STATUS_EMPTY = '<span></span>'\n_STATUS_RED = (\n '<div style=\"background:#fee2e2;border:1px solid #ef4444;color:#b91c1c;'\n 'padding:8px 12px;border-radius:6px;font-size:0.9rem;\">'\n '🔴 <strong>Compression not needed</strong> — input ({input_tok} tokens) '\n 'is already within the {budget}-token budget.</div>'\n)\n_STATUS_GREEN = (\n '<div style=\"background:#dcfce7;border:1px solid #22c55e;color:#15803d;'\n 'padding:8px 12px;border-radius:6px;font-size:0.9rem;\">'\n '🟢 <strong>Ready to compress</strong> — {input_tok} tokens → {budget} token budget '\n '({delta} tokens to shed).</div>'\n)\n\n\ndef compression_status(text: str, target_tokens: int) -> str:\n if not text.strip():\n return _STATUS_EMPTY\n n = count_tokens(text)\n if n <= int(target_tokens):\n return _STATUS_RED.format(input_tok=n, budget=int(target_tokens))\n return _STATUS_GREEN.format(input_tok=n, budget=int(target_tokens), delta=n - int(target_tokens))\n\n\ndef run_compression(text: str, target_tokens: int):\n _hidden = gr.update(visible=False)\n if not text.strip():\n return ('', 0, 0, 0, 0.0, None,\n _hidden, _hidden, gr.update(value='', visible=False),\n gr.update(value='', visible=False), _hidden, gr.update(value='', visible=False))\n\n t0 = time.perf_counter()\n compressed, input_tokens, output_tokens = compress(text, int(target_tokens))\n duration_ms = round((time.perf_counter() - t0) * 1000, 1)\n\n ratio = round(output_tokens / input_tokens, 4) if input_tokens else 0.0\n quality = semantic_score(text, compressed)\n\n run_id = save_run({\n 'timestamp': datetime.now(timezone.utc).isoformat(),\n 'model': get_current_model_id() or LLM_MODEL,\n 'tokenizer': get_current_tokenizer_id() or LLM_MODEL,\n 'input_tokens': input_tokens,\n 'output_tokens': output_tokens,\n 'target_tokens': int(target_tokens),\n 'compression_ratio': ratio,\n 'quality_score': quality,\n 'duration_ms': duration_ms,\n 'input_text': text,\n 'output_text': compressed,\n })\n\n return (\n compressed, input_tokens, output_tokens, ratio, quality,\n run_id,\n gr.update(visible=True), gr.update(visible=True),\n gr.update(value='', visible=True),\n gr.update(value='', visible=False),\n gr.update(visible=False),\n gr.update(value='', visible=False),\n )\n\n\ndef load_model(model_id: str) -> str:\n if not model_id:\n return 'No model selected.'\n try:\n return switch_llm(model_id)\n except Exception as exc:\n return f'Error loading {model_id}: {exc}'\n\n\ndef load_embedder(model_id: str) -> str:\n if not model_id:\n return 'No model selected.'\n try:\n return switch_embedder(model_id)\n except Exception as exc:\n return f'Error loading {model_id}: {exc}'\n\n\ndef on_embedder_change(model_id: str) -> str:\n return EMBEDDER_INFO.get(model_id, '')\n\n\ndef submit_feedback(run_id, value: int):\n if run_id is None:\n return 'Run a compression first.', gr.update(visible=False), gr.update(visible=False), gr.update(value='', visible=False)\n update_feedback(run_id, value)\n msg = '👍 Marked as helpful — thanks!' if value == 1 else '👎 Noted — thanks for the feedback!'\n return msg, gr.update(visible=True), gr.update(visible=True), gr.update(value='', visible=False)\n\n\ndef save_comment(run_id, comment: str):\n if run_id is None:\n return gr.update(value='Run a compression first.', visible=True)\n if not comment.strip():\n return gr.update(value='Type a note first.', visible=True)\n update_feedback_comment(run_id, comment.strip())\n return gr.update(value='✓ Note saved.', visible=True)\n\n\n# ══════════════════════════════════════════════════════════════════════════════\n# HISTORY TAB — handlers\n# ══════════════════════════════════════════════════════════════════════════════\n\n_DEFAULT_COLS = ['id', 'timestamp', 'model', 'compression_ratio', 'quality_score', 'feedback']\n_ALL_COLS = [\n 'id', 'timestamp', 'model', 'tokenizer',\n 'input_tokens', 'output_tokens', 'target_tokens',\n 'compression_ratio', 'quality_score', 'duration_ms',\n 'feedback', 'feedback_comment',\n]\n\n\ndef load_history(selected_cols=None):\n cols = selected_cols if selected_cols else _DEFAULT_COLS\n runs = get_runs(limit=100)\n if not runs:\n return pd.DataFrame(columns=cols), '', '', ''\n df = pd.DataFrame(runs)\n existing = [c for c in cols if c in df.columns]\n df = df[existing]\n avg_q = f\"{df['quality_score'].mean():.4f}\" if 'quality_score' in df.columns else '—'\n avg_r = f\"{df['compression_ratio'].mean():.4f}\" if 'compression_ratio' in df.columns else '—'\n return df, avg_q, avg_r, ''\n\n\ndef on_row_select(evt: gr.SelectData, df: pd.DataFrame):\n if df is None or df.empty:\n return None, '', 'No rows available.'\n row_idx = evt.index[0]\n run_id = int(df.iloc[row_idx]['id'])\n record = get_run(run_id)\n if not record:\n return None, '', f'Row {run_id} not found in database.'\n return run_id, render_diff_html(record), f'Row {run_id} selected — click Delete to remove.'\n\n\ndef delete_selected(run_id, selected_cols):\n if run_id is None:\n df, avg_q, avg_r, _ = load_history(selected_cols)\n return df, avg_q, avg_r, None, '', 'No row selected.'\n delete_run(run_id)\n df, avg_q, avg_r, _ = load_history(selected_cols)\n return df, avg_q, avg_r, None, '', f'Row {run_id} deleted.'\n\n\n# ══════════════════════════════════════════════════════════════════════════════\n# BUILD APP\n# ══════════════════════════════════════════════════════════════════════════════\n\ndef build_app() -> gr.Blocks:\n with gr.Blocks(title=APP_TITLE) as app:\n\n # ── Compress tab ──────────────────────────────────────────────────\n with gr.Tab('Compress'):\n gr.Markdown('## TinyPress — Prompt Compression Engine')\n gr.Markdown(\n 'Paste any long text. Set your token budget. Get a compressed version '\n 'that preserves intent — scored for quality.'\n )\n\n with gr.Accordion('Model Settings', open=False):\n gr.Markdown('**Compression Model**')\n model_dropdown = gr.Dropdown(\n choices=AVAILABLE_MODELS, value=LLM_MODEL,\n label='Compression Model', allow_custom_value=True,\n )\n load_model_btn = gr.Button('Load Model', variant='secondary')\n model_status = gr.Textbox(label='Model Status', value=f'Active: {LLM_MODEL}', interactive=False)\n\n gr.Divider()\n\n gr.Markdown('**Scoring Embedder**')\n embedder_dropdown = gr.Dropdown(\n choices=AVAILABLE_EMBEDDER_MODELS, value=EMBEDDER_MODEL,\n label='Embedder Model', allow_custom_value=True,\n )\n embedder_info_panel = gr.Markdown(value=EMBEDDER_INFO.get(EMBEDDER_MODEL, ''))\n load_embedder_btn = gr.Button('Load Embedder', variant='secondary')\n embedder_status = gr.Textbox(label='Embedder Status', value=f'Active: {EMBEDDER_MODEL}', interactive=False)\n\n with gr.Row():\n with gr.Column():\n input_text = gr.Textbox(label='Input Text', lines=12, placeholder='Paste your text here...')\n token_toggle_btn = gr.Button(_BTN_SHOW, variant='secondary', size='sm')\n token_panel = gr.HTML(value='')\n tokens_visible = gr.State(value=False)\n target_slider = gr.Slider(minimum=100, maximum=1000, value=DEFAULT_TARGET_TOKENS, step=50, label='Target Token Budget')\n status_banner = gr.HTML(value=_STATUS_EMPTY)\n compress_btn = gr.Button('Compress', variant='primary')\n\n with gr.Column():\n output_text = gr.Textbox(label='Compressed Output', lines=12)\n with gr.Row():\n input_tok = gr.Number(label='Input Tokens', interactive=False)\n output_tok = gr.Number(label='Output Tokens', interactive=False)\n with gr.Row():\n ratio = gr.Number(label='Compression Ratio', interactive=False)\n quality = gr.Number(label='Quality Score (0–1)', interactive=False)\n gr.Markdown('**Was this compression helpful?**')\n with gr.Row():\n thumbs_up_btn = gr.Button('👍 Helpful', variant='secondary', visible=False, scale=1)\n thumbs_down_btn = gr.Button('👎 Not helpful', variant='secondary', visible=False, scale=1)\n feedback_status = gr.Markdown('', visible=False)\n comment_box = gr.Textbox(\n label='Add a note (optional)',\n placeholder=\"e.g. 'lost key dates', 'too short', 'great summary'\",\n lines=2, visible=False,\n )\n save_comment_btn = gr.Button('Save note', variant='secondary', size='sm', visible=False)\n comment_saved = gr.Markdown('', visible=False)\n\n last_run_id = gr.State(value=None)\n\n token_toggle_btn.click(fn=toggle_token_panel, inputs=[tokens_visible, input_text], outputs=[tokens_visible, token_panel, token_toggle_btn])\n input_text.change(fn=update_token_panel, inputs=[input_text, tokens_visible], outputs=[token_panel])\n _sa = dict(inputs=[input_text, target_slider], outputs=[status_banner])\n input_text.change(fn=compression_status, **_sa)\n target_slider.change(fn=compression_status, **_sa)\n load_model_btn.click(fn=load_model, inputs=[model_dropdown], outputs=[model_status])\n embedder_dropdown.change(fn=on_embedder_change, inputs=[embedder_dropdown], outputs=[embedder_info_panel])\n load_embedder_btn.click(fn=load_embedder, inputs=[embedder_dropdown], outputs=[embedder_status])\n compress_btn.click(\n fn=run_compression,\n inputs=[input_text, target_slider],\n outputs=[output_text, input_tok, output_tok, ratio, quality,\n last_run_id, thumbs_up_btn, thumbs_down_btn, feedback_status,\n comment_box, save_comment_btn, comment_saved],\n )\n thumbs_up_btn.click(\n fn=lambda run_id: submit_feedback(run_id, 1),\n inputs=[last_run_id],\n outputs=[feedback_status, comment_box, save_comment_btn, comment_saved],\n )\n thumbs_down_btn.click(\n fn=lambda run_id: submit_feedback(run_id, -1),\n inputs=[last_run_id],\n outputs=[feedback_status, comment_box, save_comment_btn, comment_saved],\n )\n save_comment_btn.click(fn=save_comment, inputs=[last_run_id, comment_box], outputs=[comment_saved])\n\n # ── History tab ───────────────────────────────────────────────────\n with gr.Tab('History') as history_tab:\n gr.Markdown('## Compression Run History')\n with gr.Row():\n refresh_btn = gr.Button('Refresh', variant='secondary')\n delete_btn = gr.Button('Delete Selected Row', variant='stop')\n\n with gr.Accordion('Column visibility', open=False):\n col_picker = gr.CheckboxGroup(choices=_ALL_COLS, value=_DEFAULT_COLS, label=None)\n\n with gr.Row():\n avg_quality = gr.Textbox(label='Avg Quality Score', interactive=False)\n avg_ratio = gr.Textbox(label='Avg Compression Ratio', interactive=False)\n history_table = gr.DataFrame(label='Past Runs — click a row to see its diff', interactive=False)\n delete_status = gr.Textbox(label='Status', value='Click a row to select it.', interactive=False)\n gr.Markdown('### Side-by-side Diff')\n diff_panel = gr.HTML(value='')\n selected_id = gr.State(value=None)\n\n _outputs = [history_table, avg_quality, avg_ratio, diff_panel]\n refresh_btn.click(fn=load_history, inputs=[col_picker], outputs=_outputs)\n history_tab.select(fn=load_history, inputs=[col_picker], outputs=_outputs)\n col_picker.change(fn=load_history, inputs=[col_picker], outputs=_outputs)\n history_table.select(fn=on_row_select, inputs=[history_table], outputs=[selected_id, diff_panel, delete_status])\n delete_btn.click(\n fn=delete_selected,\n inputs=[selected_id, col_picker],\n outputs=[history_table, avg_quality, avg_ratio, selected_id, diff_panel, delete_status],\n )\n\n return app\n\n\napp = build_app()\napp.launch(share=True, server_port=SERVER_PORT)"
|
| 272 |
+
},
|
| 273 |
+
{
|
| 274 |
+
"cell_type": "markdown",
|
| 275 |
+
"id": "cell-s10-hdr",
|
| 276 |
+
"metadata": {},
|
| 277 |
+
"source": [
|
| 278 |
+
"## Step 10 — Programmatic demo (no UI needed)\n",
|
| 279 |
+
"\n",
|
| 280 |
+
"Run this cell to compress a sample text directly and inspect all metrics inline."
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": null,
|
| 286 |
+
"id": "cell-demo",
|
| 287 |
+
"metadata": {},
|
| 288 |
+
"outputs": [],
|
| 289 |
+
"source": [
|
| 290 |
+
"SAMPLE_TEXT = \"\"\"\n",
|
| 291 |
+
"The transformer architecture, introduced in the seminal paper Attention Is All You Need by Vaswani et al.\n",
|
| 292 |
+
"in 2017, fundamentally changed how we approach sequence modelling tasks in natural language processing.\n",
|
| 293 |
+
"Prior to transformers, recurrent neural networks (RNNs) and long short-term memory (LSTM) networks were\n",
|
| 294 |
+
"the dominant architectures for tasks such as machine translation, text summarisation, and question answering.\n",
|
| 295 |
+
"However, these models suffered from several limitations: they processed tokens sequentially, making\n",
|
| 296 |
+
"parallelisation difficult; they struggled to capture long-range dependencies due to vanishing gradients;\n",
|
| 297 |
+
"and training was slow even on modern hardware. The transformer addressed all of these issues through\n",
|
| 298 |
+
"its self-attention mechanism, which allows every token in a sequence to directly attend to every other\n",
|
| 299 |
+
"token in a single operation. Multi-head attention further extends this by running several attention\n",
|
| 300 |
+
"functions in parallel, capturing different types of relationships between tokens simultaneously.\n",
|
| 301 |
+
"Position encodings are added to token embeddings to give the model a sense of sequence order, since\n",
|
| 302 |
+
"unlike RNNs the architecture has no inherent notion of position. Feed-forward sub-layers, layer\n",
|
| 303 |
+
"normalisation, and residual connections complete each transformer block. The result is a model that\n",
|
| 304 |
+
"trains faster, scales better with data and compute, and generalises more effectively than its\n",
|
| 305 |
+
"predecessors, setting the stage for large language models like GPT, BERT, and the entire modern\n",
|
| 306 |
+
"LLM ecosystem.\n",
|
| 307 |
+
"\"\"\".strip()\n",
|
| 308 |
+
"\n",
|
| 309 |
+
"TARGET = 150 # token budget\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"input_tok_count = count_tokens(SAMPLE_TEXT)\n",
|
| 312 |
+
"print(f'Input tokens : {input_tok_count}')\n",
|
| 313 |
+
"print(f'Target tokens: {TARGET}')\n",
|
| 314 |
+
"print(f'Status : {\"ready to compress\" if input_tok_count > TARGET else \"already within budget\"}')\n",
|
| 315 |
+
"print()\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"t0 = time.perf_counter()\n",
|
| 318 |
+
"compressed, in_tok, out_tok = compress(SAMPLE_TEXT, TARGET)\n",
|
| 319 |
+
"elapsed = round((time.perf_counter() - t0) * 1000, 1)\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"score = semantic_score(SAMPLE_TEXT, compressed)\n",
|
| 322 |
+
"ratio = round(out_tok / in_tok, 4)\n",
|
| 323 |
+
"\n",
|
| 324 |
+
"print('─' * 60)\n",
|
| 325 |
+
"print(compressed)\n",
|
| 326 |
+
"print('─' * 60)\n",
|
| 327 |
+
"print(f'Output tokens : {out_tok}')\n",
|
| 328 |
+
"print(f'Compression ratio: {ratio}')\n",
|
| 329 |
+
"print(f'Quality score : {score}')\n",
|
| 330 |
+
"print(f'Duration : {elapsed} ms')\n",
|
| 331 |
+
"print(f'Model : {get_current_model_id()}')\n",
|
| 332 |
+
"print(f'Tokenizer : {get_current_tokenizer_id()}')"
|
| 333 |
+
]
|
| 334 |
+
}
|
| 335 |
+
]
|
| 336 |
+
}
|
ui/compress_tab.py
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import html as _h
|
| 2 |
+
import time
|
| 3 |
+
from datetime import datetime, timezone
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
|
| 7 |
+
import config
|
| 8 |
+
from core.compressor import compress
|
| 9 |
+
from core.scorer import semantic_score
|
| 10 |
+
from core.tokenizer_utils import count_tokens, get_token_strings
|
| 11 |
+
from db.store import save_run, update_feedback, update_feedback_comment
|
| 12 |
+
from models.model_loader import get_current_model_id, get_current_tokenizer_id, switch_llm, switch_embedder, get_current_embedder_id
|
| 13 |
+
|
| 14 |
+
# ── token colour palette (10 soft pastels, cycles) ───────────────────────────
|
| 15 |
+
|
| 16 |
+
_PALETTE = [
|
| 17 |
+
"#fde68a", # amber
|
| 18 |
+
"#bbf7d0", # emerald
|
| 19 |
+
"#bfdbfe", # sky-blue
|
| 20 |
+
"#fecaca", # rose
|
| 21 |
+
"#e9d5ff", # violet
|
| 22 |
+
"#fed7aa", # orange
|
| 23 |
+
"#99f6e4", # teal
|
| 24 |
+
"#e0e7ff", # indigo
|
| 25 |
+
"#fce7f3", # pink
|
| 26 |
+
"#d1fae5", # green
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
_BTN_SHOW = "🔍 Show Token Highlights"
|
| 30 |
+
_BTN_HIDE = "🙈 Hide Token Highlights"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ── token visualiser ─────────────────────────────────────────────────────────
|
| 34 |
+
|
| 35 |
+
def _render_token_html(text: str) -> str:
|
| 36 |
+
if not text.strip():
|
| 37 |
+
return ""
|
| 38 |
+
tokens = get_token_strings(text)
|
| 39 |
+
if not tokens:
|
| 40 |
+
return ""
|
| 41 |
+
|
| 42 |
+
spans = []
|
| 43 |
+
for i, tok in enumerate(tokens):
|
| 44 |
+
color = _PALETTE[i % len(_PALETTE)]
|
| 45 |
+
# Make leading whitespace visible with a mid-dot; escape everything else.
|
| 46 |
+
display = _h.escape(tok).replace(
|
| 47 |
+
" ", '<span style="opacity:0.35;font-size:0.7em">·</span>'
|
| 48 |
+
)
|
| 49 |
+
spans.append(
|
| 50 |
+
f'<span title="token {i + 1} · id" '
|
| 51 |
+
f'style="background:{color};border-radius:4px;padding:2px 5px;'
|
| 52 |
+
f'font-family:\'Courier New\',monospace;font-size:0.8rem;'
|
| 53 |
+
f'line-height:2.2;margin:2px 1px;display:inline-block;'
|
| 54 |
+
f'cursor:default;border:1px solid rgba(0,0,0,0.06)">{display}</span>'
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
return (
|
| 58 |
+
'<div style="font-family:system-ui,sans-serif;padding:10px 12px;'
|
| 59 |
+
'border:1px solid #e5e7eb;border-radius:8px;background:#fafafa">'
|
| 60 |
+
f'<div style="font-size:0.75rem;color:#6b7280;margin-bottom:8px;font-weight:500">'
|
| 61 |
+
f'{len(tokens)} tokens — each chip = one token, hover for index</div>'
|
| 62 |
+
'<div style="line-height:2.6;word-break:break-all;'
|
| 63 |
+
'max-height:200px;overflow-y:auto">'
|
| 64 |
+
+ "".join(spans)
|
| 65 |
+
+ "</div></div>"
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# ── toggle handler ────────────────────────────────────────────────────────────
|
| 70 |
+
|
| 71 |
+
def toggle_token_panel(is_visible: bool, text: str):
|
| 72 |
+
new_visible = not is_visible
|
| 73 |
+
html_content = _render_token_html(text) if new_visible else ""
|
| 74 |
+
btn_label = _BTN_HIDE if new_visible else _BTN_SHOW
|
| 75 |
+
return new_visible, html_content, gr.update(value=btn_label)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def update_token_panel(text: str, is_visible: bool) -> str:
|
| 79 |
+
"""Called on every keystroke — only re-renders when the panel is open."""
|
| 80 |
+
if not is_visible:
|
| 81 |
+
return ""
|
| 82 |
+
return _render_token_html(text)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# ── compression status banner ─────────────────────────────────────────────────
|
| 86 |
+
|
| 87 |
+
_STATUS_EMPTY = "<span></span>"
|
| 88 |
+
_STATUS_RED = (
|
| 89 |
+
'<div style="background:#fee2e2;border:1px solid #ef4444;color:#b91c1c;'
|
| 90 |
+
'padding:8px 12px;border-radius:6px;font-size:0.9rem;">'
|
| 91 |
+
"🔴 <strong>Compression not needed</strong> — input ({input_tok} tokens) "
|
| 92 |
+
"is already within the {budget}-token budget."
|
| 93 |
+
"</div>"
|
| 94 |
+
)
|
| 95 |
+
_STATUS_GREEN = (
|
| 96 |
+
'<div style="background:#dcfce7;border:1px solid #22c55e;color:#15803d;'
|
| 97 |
+
'padding:8px 12px;border-radius:6px;font-size:0.9rem;">'
|
| 98 |
+
"🟢 <strong>Ready to compress</strong> — {input_tok} tokens → {budget} token budget "
|
| 99 |
+
"({delta} tokens to shed)."
|
| 100 |
+
"</div>"
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def compression_status(text: str, target_tokens: int) -> str:
|
| 105 |
+
if not text.strip():
|
| 106 |
+
return _STATUS_EMPTY
|
| 107 |
+
n = count_tokens(text)
|
| 108 |
+
if n <= int(target_tokens):
|
| 109 |
+
return _STATUS_RED.format(input_tok=n, budget=int(target_tokens))
|
| 110 |
+
return _STATUS_GREEN.format(input_tok=n, budget=int(target_tokens), delta=n - int(target_tokens))
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# ── core handlers ─────────────────────────────────────────────────────────────
|
| 114 |
+
|
| 115 |
+
def run_compression(text: str, target_tokens: int):
|
| 116 |
+
_hidden = gr.update(visible=False)
|
| 117 |
+
if not text.strip():
|
| 118 |
+
return ("", 0, 0, 0, 0.0, None,
|
| 119 |
+
_hidden, _hidden, gr.update(value="", visible=False),
|
| 120 |
+
gr.update(value="", visible=False), _hidden, gr.update(value="", visible=False))
|
| 121 |
+
|
| 122 |
+
t0 = time.perf_counter()
|
| 123 |
+
compressed, input_tokens, output_tokens = compress(text, int(target_tokens))
|
| 124 |
+
duration_ms = round((time.perf_counter() - t0) * 1000, 1)
|
| 125 |
+
|
| 126 |
+
ratio = round(output_tokens / input_tokens, 4) if input_tokens else 0.0
|
| 127 |
+
quality = semantic_score(text, compressed)
|
| 128 |
+
|
| 129 |
+
run_id = save_run({
|
| 130 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 131 |
+
"model": get_current_model_id() or config.LLM_MODEL,
|
| 132 |
+
"tokenizer": get_current_tokenizer_id() or config.LLM_MODEL,
|
| 133 |
+
"input_tokens": input_tokens,
|
| 134 |
+
"output_tokens": output_tokens,
|
| 135 |
+
"target_tokens": int(target_tokens),
|
| 136 |
+
"compression_ratio": ratio,
|
| 137 |
+
"quality_score": quality,
|
| 138 |
+
"duration_ms": duration_ms,
|
| 139 |
+
"input_text": text,
|
| 140 |
+
"output_text": compressed,
|
| 141 |
+
})
|
| 142 |
+
|
| 143 |
+
return (
|
| 144 |
+
compressed, input_tokens, output_tokens, ratio, quality,
|
| 145 |
+
run_id,
|
| 146 |
+
gr.update(visible=True), gr.update(visible=True), # thumbs buttons
|
| 147 |
+
gr.update(value="", visible=True), # feedback_status
|
| 148 |
+
gr.update(value="", visible=False), # comment_box reset
|
| 149 |
+
gr.update(visible=False), # save_comment_btn reset
|
| 150 |
+
gr.update(value="", visible=False), # comment_saved reset
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def load_model(model_id: str) -> str:
|
| 155 |
+
if not model_id:
|
| 156 |
+
return "No model selected."
|
| 157 |
+
try:
|
| 158 |
+
return switch_llm(model_id)
|
| 159 |
+
except Exception as exc:
|
| 160 |
+
return f"Error loading {model_id}: {exc}"
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def load_embedder(model_id: str) -> str:
|
| 164 |
+
if not model_id:
|
| 165 |
+
return "No model selected."
|
| 166 |
+
try:
|
| 167 |
+
return switch_embedder(model_id)
|
| 168 |
+
except Exception as exc:
|
| 169 |
+
return f"Error loading {model_id}: {exc}"
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def on_embedder_change(model_id: str) -> str:
|
| 173 |
+
return config.EMBEDDER_INFO.get(model_id, "")
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def submit_feedback(run_id, value: int):
|
| 177 |
+
if run_id is None:
|
| 178 |
+
return "Run a compression first.", gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False)
|
| 179 |
+
update_feedback(run_id, value)
|
| 180 |
+
msg = "👍 Marked as helpful — thanks!" if value == 1 else "👎 Noted — thanks for the feedback!"
|
| 181 |
+
return msg, gr.update(visible=True), gr.update(visible=True), gr.update(value="", visible=False)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def save_comment(run_id, comment: str):
|
| 185 |
+
if run_id is None:
|
| 186 |
+
return gr.update(value="Run a compression first.", visible=True)
|
| 187 |
+
if not comment.strip():
|
| 188 |
+
return gr.update(value="Type a note first.", visible=True)
|
| 189 |
+
update_feedback_comment(run_id, comment.strip())
|
| 190 |
+
return gr.update(value="✓ Note saved.", visible=True)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
# ── UI ────────────────────────────────────────────────────────────────────────
|
| 194 |
+
|
| 195 |
+
def build_compress_tab() -> gr.Tab:
|
| 196 |
+
with gr.Tab("Compress") as tab:
|
| 197 |
+
gr.Markdown("## TinyPress — Prompt Compression Engine")
|
| 198 |
+
gr.Markdown(
|
| 199 |
+
"Paste any long text. Set your token budget. Get a compressed version "
|
| 200 |
+
"that preserves intent — scored for quality."
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
with gr.Accordion("Model Settings", open=False):
|
| 204 |
+
gr.Markdown("**Compression Model**")
|
| 205 |
+
model_dropdown = gr.Dropdown(
|
| 206 |
+
choices=config.AVAILABLE_MODELS,
|
| 207 |
+
value=config.LLM_MODEL,
|
| 208 |
+
label="Compression Model",
|
| 209 |
+
allow_custom_value=True,
|
| 210 |
+
)
|
| 211 |
+
load_model_btn = gr.Button("Load Model", variant="secondary")
|
| 212 |
+
model_status = gr.Textbox(
|
| 213 |
+
label="Model Status",
|
| 214 |
+
value=f"Active: {config.LLM_MODEL}",
|
| 215 |
+
interactive=False,
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
gr.Divider()
|
| 219 |
+
|
| 220 |
+
gr.Markdown("**Scoring Embedder**")
|
| 221 |
+
embedder_dropdown = gr.Dropdown(
|
| 222 |
+
choices=config.AVAILABLE_EMBEDDER_MODELS,
|
| 223 |
+
value=config.EMBEDDER_MODEL,
|
| 224 |
+
label="Embedder Model",
|
| 225 |
+
allow_custom_value=True,
|
| 226 |
+
)
|
| 227 |
+
embedder_info_panel = gr.Markdown(
|
| 228 |
+
value=config.EMBEDDER_INFO.get(config.EMBEDDER_MODEL, "")
|
| 229 |
+
)
|
| 230 |
+
load_embedder_btn = gr.Button("Load Embedder", variant="secondary")
|
| 231 |
+
embedder_status = gr.Textbox(
|
| 232 |
+
label="Embedder Status",
|
| 233 |
+
value=f"Active: {config.EMBEDDER_MODEL}",
|
| 234 |
+
interactive=False,
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
with gr.Row():
|
| 238 |
+
with gr.Column():
|
| 239 |
+
input_text = gr.Textbox(
|
| 240 |
+
label="Input Text",
|
| 241 |
+
lines=12,
|
| 242 |
+
placeholder="Paste your text here...",
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# ── token highlight panel ──────────────────────────────────
|
| 246 |
+
token_toggle_btn = gr.Button(_BTN_SHOW, variant="secondary", size="sm")
|
| 247 |
+
token_panel = gr.HTML(value="")
|
| 248 |
+
tokens_visible = gr.State(value=False)
|
| 249 |
+
# ──────────────────────────────────────────────────────────
|
| 250 |
+
|
| 251 |
+
target_slider = gr.Slider(
|
| 252 |
+
minimum=100,
|
| 253 |
+
maximum=1000,
|
| 254 |
+
value=config.DEFAULT_TARGET_TOKENS,
|
| 255 |
+
step=50,
|
| 256 |
+
label="Target Token Budget",
|
| 257 |
+
)
|
| 258 |
+
status_banner = gr.HTML(value=_STATUS_EMPTY)
|
| 259 |
+
compress_btn = gr.Button("Compress", variant="primary")
|
| 260 |
+
|
| 261 |
+
with gr.Column():
|
| 262 |
+
output_text = gr.Textbox(label="Compressed Output", lines=12)
|
| 263 |
+
with gr.Row():
|
| 264 |
+
input_tok = gr.Number(label="Input Tokens", interactive=False)
|
| 265 |
+
output_tok = gr.Number(label="Output Tokens", interactive=False)
|
| 266 |
+
with gr.Row():
|
| 267 |
+
ratio = gr.Number(label="Compression Ratio", interactive=False)
|
| 268 |
+
quality = gr.Number(label="Quality Score (0–1)", interactive=False)
|
| 269 |
+
gr.Markdown("**Was this compression helpful?**")
|
| 270 |
+
with gr.Row():
|
| 271 |
+
thumbs_up_btn = gr.Button("👍 Helpful", variant="secondary", visible=False, scale=1)
|
| 272 |
+
thumbs_down_btn = gr.Button("👎 Not helpful", variant="secondary", visible=False, scale=1)
|
| 273 |
+
feedback_status = gr.Markdown("", visible=False)
|
| 274 |
+
comment_box = gr.Textbox(
|
| 275 |
+
label="Add a note (optional)",
|
| 276 |
+
placeholder="e.g. 'lost key dates', 'too short', 'great summary'",
|
| 277 |
+
lines=2,
|
| 278 |
+
visible=False,
|
| 279 |
+
)
|
| 280 |
+
save_comment_btn = gr.Button("Save note", variant="secondary", size="sm", visible=False)
|
| 281 |
+
comment_saved = gr.Markdown("", visible=False)
|
| 282 |
+
|
| 283 |
+
last_run_id = gr.State(value=None)
|
| 284 |
+
|
| 285 |
+
# ── event wiring ──────────────────────────────────────────────────
|
| 286 |
+
token_toggle_btn.click(
|
| 287 |
+
fn=toggle_token_panel,
|
| 288 |
+
inputs=[tokens_visible, input_text],
|
| 289 |
+
outputs=[tokens_visible, token_panel, token_toggle_btn],
|
| 290 |
+
)
|
| 291 |
+
input_text.change(
|
| 292 |
+
fn=update_token_panel,
|
| 293 |
+
inputs=[input_text, tokens_visible],
|
| 294 |
+
outputs=[token_panel],
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
_status_args = dict(inputs=[input_text, target_slider], outputs=[status_banner])
|
| 298 |
+
input_text.change(fn=compression_status, **_status_args)
|
| 299 |
+
target_slider.change(fn=compression_status, **_status_args)
|
| 300 |
+
|
| 301 |
+
load_model_btn.click(fn=load_model, inputs=[model_dropdown], outputs=[model_status])
|
| 302 |
+
embedder_dropdown.change(fn=on_embedder_change, inputs=[embedder_dropdown], outputs=[embedder_info_panel])
|
| 303 |
+
load_embedder_btn.click(fn=load_embedder, inputs=[embedder_dropdown], outputs=[embedder_status])
|
| 304 |
+
compress_btn.click(
|
| 305 |
+
fn=run_compression,
|
| 306 |
+
inputs=[input_text, target_slider],
|
| 307 |
+
outputs=[output_text, input_tok, output_tok, ratio, quality,
|
| 308 |
+
last_run_id, thumbs_up_btn, thumbs_down_btn, feedback_status,
|
| 309 |
+
comment_box, save_comment_btn, comment_saved],
|
| 310 |
+
)
|
| 311 |
+
thumbs_up_btn.click(
|
| 312 |
+
fn=lambda run_id: submit_feedback(run_id, 1),
|
| 313 |
+
inputs=[last_run_id],
|
| 314 |
+
outputs=[feedback_status, comment_box, save_comment_btn, comment_saved],
|
| 315 |
+
)
|
| 316 |
+
thumbs_down_btn.click(
|
| 317 |
+
fn=lambda run_id: submit_feedback(run_id, -1),
|
| 318 |
+
inputs=[last_run_id],
|
| 319 |
+
outputs=[feedback_status, comment_box, save_comment_btn, comment_saved],
|
| 320 |
+
)
|
| 321 |
+
save_comment_btn.click(
|
| 322 |
+
fn=save_comment,
|
| 323 |
+
inputs=[last_run_id, comment_box],
|
| 324 |
+
outputs=[comment_saved],
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
return tab
|
ui/history_tab.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from db.store import get_runs, delete_run, get_run
|
| 4 |
+
from core.diff import render_diff_html
|
| 5 |
+
|
| 6 |
+
_DEFAULT_COLS = ["id", "timestamp", "model", "compression_ratio", "quality_score", "feedback"]
|
| 7 |
+
_ALL_COLS = [
|
| 8 |
+
"id", "timestamp", "model", "tokenizer",
|
| 9 |
+
"input_tokens", "output_tokens", "target_tokens",
|
| 10 |
+
"compression_ratio", "quality_score", "duration_ms",
|
| 11 |
+
"feedback", "feedback_comment",
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def load_history(selected_cols=None):
|
| 16 |
+
cols = selected_cols if selected_cols else _DEFAULT_COLS
|
| 17 |
+
runs = get_runs(limit=100)
|
| 18 |
+
if not runs:
|
| 19 |
+
return pd.DataFrame(columns=cols), "", "", ""
|
| 20 |
+
df = pd.DataFrame(runs)
|
| 21 |
+
existing = [c for c in cols if c in df.columns]
|
| 22 |
+
df = df[existing]
|
| 23 |
+
avg_quality = f"{df['quality_score'].mean():.4f}" if "quality_score" in df.columns else "—"
|
| 24 |
+
avg_ratio = f"{df['compression_ratio'].mean():.4f}" if "compression_ratio" in df.columns else "—"
|
| 25 |
+
return df, avg_quality, avg_ratio, ""
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
|
| 29 |
+
if df is None or df.empty:
|
| 30 |
+
return None, "", "No rows available."
|
| 31 |
+
row_idx = evt.index[0]
|
| 32 |
+
run_id = int(df.iloc[row_idx]["id"])
|
| 33 |
+
record = get_run(run_id)
|
| 34 |
+
if not record:
|
| 35 |
+
return None, "", f"Row {run_id} not found in database."
|
| 36 |
+
diff_html = render_diff_html(record)
|
| 37 |
+
return run_id, diff_html, f"Row {run_id} selected — click Delete to remove."
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def delete_selected(run_id, selected_cols):
|
| 41 |
+
if run_id is None:
|
| 42 |
+
df, avg_q, avg_r, _ = load_history(selected_cols)
|
| 43 |
+
return df, avg_q, avg_r, None, "", "No row selected."
|
| 44 |
+
delete_run(run_id)
|
| 45 |
+
df, avg_q, avg_r, _ = load_history(selected_cols)
|
| 46 |
+
return df, avg_q, avg_r, None, "", f"Row {run_id} deleted."
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def build_history_tab() -> gr.Tab:
|
| 50 |
+
with gr.Tab("History") as tab:
|
| 51 |
+
gr.Markdown("## Compression Run History")
|
| 52 |
+
|
| 53 |
+
with gr.Row():
|
| 54 |
+
refresh_btn = gr.Button("Refresh", variant="secondary")
|
| 55 |
+
delete_btn = gr.Button("Delete Selected Row", variant="stop")
|
| 56 |
+
|
| 57 |
+
with gr.Accordion("Column visibility", open=False):
|
| 58 |
+
col_picker = gr.CheckboxGroup(
|
| 59 |
+
choices=_ALL_COLS,
|
| 60 |
+
value=_DEFAULT_COLS,
|
| 61 |
+
label=None,
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
with gr.Row():
|
| 65 |
+
avg_quality = gr.Textbox(label="Avg Quality Score", interactive=False)
|
| 66 |
+
avg_ratio = gr.Textbox(label="Avg Compression Ratio", interactive=False)
|
| 67 |
+
|
| 68 |
+
history_table = gr.DataFrame(
|
| 69 |
+
label="Past Runs — click a row to see its diff",
|
| 70 |
+
interactive=False,
|
| 71 |
+
)
|
| 72 |
+
delete_status = gr.Textbox(
|
| 73 |
+
label="Status", value="Click a row to select it.", interactive=False
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
gr.Markdown("### Side-by-side Diff")
|
| 77 |
+
diff_panel = gr.HTML(value="")
|
| 78 |
+
selected_id = gr.State(value=None)
|
| 79 |
+
|
| 80 |
+
_outputs = [history_table, avg_quality, avg_ratio, diff_panel]
|
| 81 |
+
|
| 82 |
+
refresh_btn.click(fn=load_history, inputs=[col_picker], outputs=_outputs)
|
| 83 |
+
tab.select(fn=load_history, inputs=[col_picker], outputs=_outputs)
|
| 84 |
+
col_picker.change(fn=load_history, inputs=[col_picker], outputs=_outputs)
|
| 85 |
+
history_table.select(
|
| 86 |
+
fn=on_row_select,
|
| 87 |
+
inputs=[history_table],
|
| 88 |
+
outputs=[selected_id, diff_panel, delete_status],
|
| 89 |
+
)
|
| 90 |
+
delete_btn.click(
|
| 91 |
+
fn=delete_selected,
|
| 92 |
+
inputs=[selected_id, col_picker],
|
| 93 |
+
outputs=[history_table, avg_quality, avg_ratio, selected_id, diff_panel, delete_status],
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
return tab
|