wchen22 commited on
Commit
2dab644
·
verified ·
1 Parent(s): b6d8182

Cache tokenizer in compression Space

Browse files
Files changed (2) hide show
  1. README.md +11 -3
  2. app.py +9 -3
README.md CHANGED
@@ -22,6 +22,15 @@ Endpoints:
22
  - `POST /v1/compress`
23
  - `POST /v1/classify`
24
 
 
 
 
 
 
 
 
 
 
25
  Deploy:
26
 
27
  ```bash
@@ -29,6 +38,5 @@ hf auth login
29
  ./deploy.sh <namespace>/touchdown-compression-classifier
30
  ```
31
 
32
- The current repo machine must be logged into Hugging Face before this can be
33
- hosted. Free CPU Spaces are enough for this scaffold; production traffic should
34
- move to paid or owned infrastructure after validation.
 
22
  - `POST /v1/compress`
23
  - `POST /v1/classify`
24
 
25
+ Live Space:
26
+
27
+ - `https://wchen22-touchdown-compression-classifier.hf.space`
28
+ - Verified 2026-06-11 on free `cpu-basic`: `/health`, `/v1/classify`, and
29
+ `/v1/compress` returned 200.
30
+ - `/v1/classify` is tokenizer/fallback KEEP-only until a trained KEEP/DROP head
31
+ is mounted. `/v1/compress` is rules-first deletion-only compression with
32
+ safety receipts.
33
+
34
  Deploy:
35
 
36
  ```bash
 
38
  ./deploy.sh <namespace>/touchdown-compression-classifier
39
  ```
40
 
41
+ Free CPU Spaces are enough for this scaffold; production traffic should move to
42
+ paid or owned infrastructure after validation.
 
app.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
 
3
  import re
4
  import time
 
5
  from typing import Any
6
 
7
  from fastapi import FastAPI, HTTPException
@@ -23,6 +24,13 @@ LOW_SIGNAL_PATTERNS = [
23
  app = FastAPI(title="Touchdown Compression Classifier", version="0.1.0")
24
 
25
 
 
 
 
 
 
 
 
26
  def _find_spans(text: str, needle: str) -> list[tuple[int, int]]:
27
  spans = []
28
  cursor = 0
@@ -239,9 +247,7 @@ def _compress_text(payload: dict[str, Any]) -> dict[str, Any]:
239
  def _tokens(text: str) -> list[dict[str, Any]]:
240
  started = time.perf_counter()
241
  try:
242
- from transformers import AutoTokenizer
243
-
244
- tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
245
  encoded = tokenizer(
246
  text,
247
  add_special_tokens=False,
 
2
 
3
  import re
4
  import time
5
+ from functools import lru_cache
6
  from typing import Any
7
 
8
  from fastapi import FastAPI, HTTPException
 
24
  app = FastAPI(title="Touchdown Compression Classifier", version="0.1.0")
25
 
26
 
27
+ @lru_cache(maxsize=1)
28
+ def _get_tokenizer():
29
+ from transformers import AutoTokenizer
30
+
31
+ return AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
32
+
33
+
34
  def _find_spans(text: str, needle: str) -> list[tuple[int, int]]:
35
  spans = []
36
  cursor = 0
 
247
  def _tokens(text: str) -> list[dict[str, Any]]:
248
  started = time.perf_counter()
249
  try:
250
+ tokenizer = _get_tokenizer()
 
 
251
  encoded = tokenizer(
252
  text,
253
  add_special_tokens=False,