Cache tokenizer in compression Space
Browse files
README.md
CHANGED
|
@@ -22,6 +22,15 @@ Endpoints:
|
|
| 22 |
- `POST /v1/compress`
|
| 23 |
- `POST /v1/classify`
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
Deploy:
|
| 26 |
|
| 27 |
```bash
|
|
@@ -29,6 +38,5 @@ hf auth login
|
|
| 29 |
./deploy.sh <namespace>/touchdown-compression-classifier
|
| 30 |
```
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
move to paid or owned infrastructure after validation.
|
|
|
|
| 22 |
- `POST /v1/compress`
|
| 23 |
- `POST /v1/classify`
|
| 24 |
|
| 25 |
+
Live Space:
|
| 26 |
+
|
| 27 |
+
- `https://wchen22-touchdown-compression-classifier.hf.space`
|
| 28 |
+
- Verified 2026-06-11 on free `cpu-basic`: `/health`, `/v1/classify`, and
|
| 29 |
+
`/v1/compress` returned 200.
|
| 30 |
+
- `/v1/classify` is tokenizer/fallback KEEP-only until a trained KEEP/DROP head
|
| 31 |
+
is mounted. `/v1/compress` is rules-first deletion-only compression with
|
| 32 |
+
safety receipts.
|
| 33 |
+
|
| 34 |
Deploy:
|
| 35 |
|
| 36 |
```bash
|
|
|
|
| 38 |
./deploy.sh <namespace>/touchdown-compression-classifier
|
| 39 |
```
|
| 40 |
|
| 41 |
+
Free CPU Spaces are enough for this scaffold; production traffic should move to
|
| 42 |
+
paid or owned infrastructure after validation.
|
|
|
app.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
import re
|
| 4 |
import time
|
|
|
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
from fastapi import FastAPI, HTTPException
|
|
@@ -23,6 +24,13 @@ LOW_SIGNAL_PATTERNS = [
|
|
| 23 |
app = FastAPI(title="Touchdown Compression Classifier", version="0.1.0")
|
| 24 |
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def _find_spans(text: str, needle: str) -> list[tuple[int, int]]:
|
| 27 |
spans = []
|
| 28 |
cursor = 0
|
|
@@ -239,9 +247,7 @@ def _compress_text(payload: dict[str, Any]) -> dict[str, Any]:
|
|
| 239 |
def _tokens(text: str) -> list[dict[str, Any]]:
|
| 240 |
started = time.perf_counter()
|
| 241 |
try:
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
|
| 245 |
encoded = tokenizer(
|
| 246 |
text,
|
| 247 |
add_special_tokens=False,
|
|
|
|
| 2 |
|
| 3 |
import re
|
| 4 |
import time
|
| 5 |
+
from functools import lru_cache
|
| 6 |
from typing import Any
|
| 7 |
|
| 8 |
from fastapi import FastAPI, HTTPException
|
|
|
|
| 24 |
app = FastAPI(title="Touchdown Compression Classifier", version="0.1.0")
|
| 25 |
|
| 26 |
|
| 27 |
+
@lru_cache(maxsize=1)
|
| 28 |
+
def _get_tokenizer():
|
| 29 |
+
from transformers import AutoTokenizer
|
| 30 |
+
|
| 31 |
+
return AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
def _find_spans(text: str, needle: str) -> list[tuple[int, int]]:
|
| 35 |
spans = []
|
| 36 |
cursor = 0
|
|
|
|
| 247 |
def _tokens(text: str) -> list[dict[str, Any]]:
|
| 248 |
started = time.perf_counter()
|
| 249 |
try:
|
| 250 |
+
tokenizer = _get_tokenizer()
|
|
|
|
|
|
|
| 251 |
encoded = tokenizer(
|
| 252 |
text,
|
| 253 |
add_special_tokens=False,
|