wchen22 commited on
Commit
0dfe65a
·
verified ·
1 Parent(s): b402ba6

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +11 -5
  2. app.py +240 -2
README.md CHANGED
@@ -26,14 +26,16 @@ Live Space:
26
 
27
  - `https://wchen22-touchdown-compression-classifier.hf.space`
28
  - Verified 2026-06-11 with HF CLI: runtime stage `RUNNING`, hardware
29
- `cpu-basic`, domain `READY`.
 
30
  - The deployed scaffold supports chunked ONNX artifact inference for long
31
  prompts. Use `hf spaces info wchen22/touchdown-compression-classifier --format
32
  json` for the current repo/runtime SHA.
33
  - Live smoke:
34
- `python3 scripts/smoke_compression_api.py --base-url https://wchen22-touchdown-compression-classifier.hf.space --include-classify --include-batch --include-gzip`
35
  validates `/health`, `/v1/classify`, single `/v1/compress`, and managed
36
- `inputs[]` batch, plus gzipped JSON request/response transport.
 
37
  - Full deployment receipt:
38
  `python3 scripts/verify_compression_space.py --expected-sha <sha> --out reports/generated/compression_space/hf_space_verification.json`
39
  validates HF runtime metadata, repo/runtime SHA agreement, API smoke, and
@@ -41,12 +43,16 @@ Live Space:
41
  - Fresh local receipts are written under
42
  `reports/generated/compression_space/`; run the full verifier with the
43
  current Space SHA to check runtime, API smoke, and remote/local file parity.
 
 
44
  - Latest live result: `/v1/compress` saved 27/102 estimated tokens;
45
  managed `inputs[]` returned `input_count=2`, `succeeded=2`, `failed=0`,
 
46
  gzip transport returned `response_content_encoding=gzip`, and `/v1/classify`
47
  returned KEEP-only DeBERTa tokenizer labels. Receipts include
48
- removed-span/char totals, classifier DROP block reasons, and tool-schema
49
- preservation counts when `tools` or `tool_schemas` are supplied.
 
50
  Matching `Idempotency-Key` retries replay the first in-memory response;
51
  payload conflicts return HTTP 409. This is per-process memory on the Space,
52
  not a durable distributed store.
 
26
 
27
  - `https://wchen22-touchdown-compression-classifier.hf.space`
28
  - Verified 2026-06-11 with HF CLI: runtime stage `RUNNING`, hardware
29
+ `cpu-basic`, domain `READY`, repo/runtime SHA
30
+ `b402ba63bf08ce65bd30da071256555382be4fe0`.
31
  - The deployed scaffold supports chunked ONNX artifact inference for long
32
  prompts. Use `hf spaces info wchen22/touchdown-compression-classifier --format
33
  json` for the current repo/runtime SHA.
34
  - Live smoke:
35
+ `python3 scripts/smoke_compression_api.py --base-url https://wchen22-touchdown-compression-classifier.hf.space --include-classify --include-batch --include-messages --include-gzip`
36
  validates `/health`, `/v1/classify`, single `/v1/compress`, and managed
37
+ `inputs[]` batch, managed `messages[]`, plus gzipped JSON request/response
38
+ transport.
39
  - Full deployment receipt:
40
  `python3 scripts/verify_compression_space.py --expected-sha <sha> --out reports/generated/compression_space/hf_space_verification.json`
41
  validates HF runtime metadata, repo/runtime SHA agreement, API smoke, and
 
43
  - Fresh local receipts are written under
44
  `reports/generated/compression_space/`; run the full verifier with the
45
  current Space SHA to check runtime, API smoke, and remote/local file parity.
46
+ Current live receipt:
47
+ `reports/generated/compression_space/hf_space_verification_2026-06-11-idempotency-replay-health.json`.
48
  - Latest live result: `/v1/compress` saved 27/102 estimated tokens;
49
  managed `inputs[]` returned `input_count=2`, `succeeded=2`, `failed=0`,
50
+ managed `messages[]` returned `message_count=2` with system-role protection,
51
  gzip transport returned `response_content_encoding=gzip`, and `/v1/classify`
52
  returned KEEP-only DeBERTa tokenizer labels. Receipts include
53
+ removed-span/char totals, classifier DROP block reasons, tool-schema
54
+ preservation counts when `tools` or `tool_schemas` are supplied, and
55
+ `/health` idempotency TTL reporting.
56
  Matching `Idempotency-Key` retries replay the first in-memory response;
57
  payload conflicts return HTTP 409. This is per-process memory on the Space,
58
  not a durable distributed store.
app.py CHANGED
@@ -29,6 +29,7 @@ GZIP_ENCODING = "gzip"
29
  GZIP_MAGIC = b"\x1f\x8b"
30
  DEFAULT_IDEMPOTENCY_TTL_SECONDS = 24 * 60 * 60
31
  IDEMPOTENCY_TTL_ENV = "TOUCHDOWN_IDEMPOTENCY_TTL_SECONDS"
 
32
  LOW_SIGNAL_PATTERNS = [
33
  re.compile(pattern, re.IGNORECASE)
34
  for pattern in [
@@ -315,7 +316,7 @@ def _store_idempotency_body(
315
  def _handle_compress_with_idempotency(payload: dict[str, Any]) -> dict[str, Any]:
316
  key = _idempotency_key_from_payload(payload)
317
  if not key:
318
- return _handle_batch(payload) if "inputs" in payload else _compress_text(payload)
319
  route = "/v1/compress"
320
  fingerprint = _idempotency_fingerprint(route, payload)
321
  cached = _cached_idempotency_body(
@@ -325,7 +326,7 @@ def _handle_compress_with_idempotency(payload: dict[str, Any]) -> dict[str, Any]
325
  )
326
  if cached is not None:
327
  return cached
328
- body = _handle_batch(payload) if "inputs" in payload else _compress_text(payload)
329
  request_id = payload.get("request_id") if isinstance(payload.get("request_id"), str) else None
330
  return _store_idempotency_body(
331
  route=route,
@@ -650,6 +651,66 @@ def _receipt_id(payload: dict[str, Any]) -> str:
650
  return "tdcr_" + hashlib.sha256(encoded.encode("utf-8")).hexdigest()[:24]
651
 
652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  def _correlation_payload(
654
  payload: dict[str, Any],
655
  *,
@@ -1085,6 +1146,183 @@ def _handle_batch(payload: dict[str, Any]) -> dict[str, Any]:
1085
  return body
1086
 
1087
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1088
  def _tokens(text: str) -> list[dict[str, Any]]:
1089
  started = time.perf_counter()
1090
  try:
 
29
  GZIP_MAGIC = b"\x1f\x8b"
30
  DEFAULT_IDEMPOTENCY_TTL_SECONDS = 24 * 60 * 60
31
  IDEMPOTENCY_TTL_ENV = "TOUCHDOWN_IDEMPOTENCY_TTL_SECONDS"
32
+ DEFAULT_PROTECTED_MESSAGE_ROLES = ("system", "developer")
33
  LOW_SIGNAL_PATTERNS = [
34
  re.compile(pattern, re.IGNORECASE)
35
  for pattern in [
 
316
  def _handle_compress_with_idempotency(payload: dict[str, Any]) -> dict[str, Any]:
317
  key = _idempotency_key_from_payload(payload)
318
  if not key:
319
+ return _dispatch_compress(payload)
320
  route = "/v1/compress"
321
  fingerprint = _idempotency_fingerprint(route, payload)
322
  cached = _cached_idempotency_body(
 
326
  )
327
  if cached is not None:
328
  return cached
329
+ body = _dispatch_compress(payload)
330
  request_id = payload.get("request_id") if isinstance(payload.get("request_id"), str) else None
331
  return _store_idempotency_body(
332
  route=route,
 
651
  return "tdcr_" + hashlib.sha256(encoded.encode("utf-8")).hexdigest()[:24]
652
 
653
 
654
+ def _stable_json_sha256(value: Any) -> str:
655
+ encoded = json.dumps(
656
+ value,
657
+ ensure_ascii=False,
658
+ sort_keys=True,
659
+ separators=(",", ":"),
660
+ )
661
+ return hashlib.sha256(encoded.encode("utf-8")).hexdigest()
662
+
663
+
664
+ def _aggregate_receipt_id(payload: dict[str, Any]) -> str:
665
+ encoded = json.dumps(payload, sort_keys=True, separators=(",", ":"))
666
+ return "tdcm_" + hashlib.sha256(encoded.encode("utf-8")).hexdigest()[:24]
667
+
668
+
669
+ def _string_set(
670
+ value: Any,
671
+ *,
672
+ default: tuple[str, ...] = (),
673
+ field_name: str,
674
+ ) -> set[str]:
675
+ if value is None:
676
+ return set(default)
677
+ if (
678
+ isinstance(value, list)
679
+ and all(isinstance(item, str) and item for item in value)
680
+ ):
681
+ return {item.lower() for item in value}
682
+ raise HTTPException(status_code=400, detail=f"{field_name} must be a list of strings")
683
+
684
+
685
+ def _message_role(message: dict[str, Any], index: int) -> str:
686
+ role = message.get("role")
687
+ if not isinstance(role, str) or not role:
688
+ raise HTTPException(
689
+ status_code=400,
690
+ detail=f"messages[{index}].role must be a string",
691
+ )
692
+ return role.lower()
693
+
694
+
695
+ def _message_decision(
696
+ *,
697
+ tokens_saved: int,
698
+ receipts: list[dict[str, Any]],
699
+ ) -> str:
700
+ decisions = [
701
+ receipt.get("decision")
702
+ for receipt in receipts
703
+ if isinstance(receipt, dict)
704
+ ]
705
+ if any(decision == "reject" for decision in decisions):
706
+ return "reject"
707
+ if any(decision == "needs_review" for decision in decisions):
708
+ return "needs_review"
709
+ if tokens_saved <= 0:
710
+ return "no_op"
711
+ return "high_confidence"
712
+
713
+
714
  def _correlation_payload(
715
  payload: dict[str, Any],
716
  *,
 
1146
  return body
1147
 
1148
 
1149
+ def _handle_messages(payload: dict[str, Any]) -> dict[str, Any]:
1150
+ if "input" in payload or "inputs" in payload:
1151
+ raise HTTPException(status_code=400, detail="provide either messages, input, or inputs")
1152
+ messages = payload.get("messages")
1153
+ if not isinstance(messages, list) or not messages:
1154
+ raise HTTPException(status_code=400, detail="messages must be a non-empty list")
1155
+ if not all(isinstance(message, dict) for message in messages):
1156
+ raise HTTPException(status_code=400, detail="messages entries must be objects")
1157
+ settings = payload.get("compression_settings") or {}
1158
+ if not isinstance(settings, dict):
1159
+ raise HTTPException(status_code=400, detail="compression_settings must be an object")
1160
+ protected_roles = _string_set(
1161
+ settings.get("protected_roles"),
1162
+ default=DEFAULT_PROTECTED_MESSAGE_ROLES,
1163
+ field_name="compression_settings.protected_roles",
1164
+ )
1165
+ compress_roles = (
1166
+ _string_set(
1167
+ settings.get("compress_roles"),
1168
+ field_name="compression_settings.compress_roles",
1169
+ )
1170
+ if "compress_roles" in settings else None
1171
+ )
1172
+ protected_values = payload.get("protected_spans") or []
1173
+ if not isinstance(protected_values, list) or not all(
1174
+ isinstance(value, str) for value in protected_values
1175
+ ):
1176
+ raise HTTPException(status_code=400, detail="protected_spans must be strings")
1177
+ request_id = payload.get("request_id")
1178
+ if request_id is not None and not isinstance(request_id, str):
1179
+ raise HTTPException(status_code=400, detail="request_id must be a string")
1180
+ idempotency_key = payload.get("idempotency_key")
1181
+ if idempotency_key is not None and not isinstance(idempotency_key, str):
1182
+ raise HTTPException(status_code=400, detail="idempotency_key must be a string")
1183
+
1184
+ output_messages: list[dict[str, Any]] = []
1185
+ receipts: list[dict[str, Any]] = []
1186
+ nested_receipts: list[dict[str, Any]] = []
1187
+ receipt_ids: list[str] = []
1188
+ original_tokens = 0
1189
+ output_tokens = 0
1190
+ compressed_message_count = 0
1191
+ skipped_message_count = 0
1192
+
1193
+ for index, message in enumerate(messages):
1194
+ role = _message_role(message, index)
1195
+ content = message.get("content")
1196
+ output_message = dict(message)
1197
+ if not isinstance(content, str):
1198
+ skipped_message_count += 1
1199
+ receipts.append({
1200
+ "index": index,
1201
+ "role": role,
1202
+ "status": "skipped",
1203
+ "reason": "non_string_content",
1204
+ "content_type": type(content).__name__,
1205
+ "original_input_tokens": 0,
1206
+ "output_tokens": 0,
1207
+ "tokens_saved": 0,
1208
+ })
1209
+ output_messages.append(output_message)
1210
+ continue
1211
+
1212
+ role_protected = role in protected_roles or (
1213
+ compress_roles is not None and role not in compress_roles
1214
+ )
1215
+ item_protected = list(protected_values)
1216
+ if role_protected and content:
1217
+ item_protected.append(content)
1218
+ result = _compress_text({
1219
+ "input": content,
1220
+ "compression_settings": settings,
1221
+ "protected_spans": item_protected,
1222
+ "tool_schemas": payload.get("tool_schemas", payload.get("tools")),
1223
+ "request_id": request_id,
1224
+ "idempotency_key": idempotency_key,
1225
+ })
1226
+ output_message["content"] = result["output"]
1227
+ output_messages.append(output_message)
1228
+ original_tokens += int(result["original_input_tokens"])
1229
+ output_tokens += int(result["output_tokens"])
1230
+ tokens_saved = int(result["tokens_saved"])
1231
+ if tokens_saved > 0:
1232
+ compressed_message_count += 1
1233
+ receipt = result["receipt"]
1234
+ nested_receipts.append(receipt)
1235
+ receipt_ids.append(receipt["receipt_id"])
1236
+ receipts.append({
1237
+ "index": index,
1238
+ "role": role,
1239
+ "status": "ok",
1240
+ "protected_by_role": role_protected,
1241
+ "original_input_tokens": result["original_input_tokens"],
1242
+ "output_tokens": result["output_tokens"],
1243
+ "tokens_saved": tokens_saved,
1244
+ "compression_percentage": result["compression_percentage"],
1245
+ "receipt_id": receipt["receipt_id"],
1246
+ "receipt": receipt,
1247
+ })
1248
+
1249
+ tokens_saved_total = max(0, original_tokens - output_tokens)
1250
+ compression_pct = (
1251
+ round(100.0 * tokens_saved_total / original_tokens, 1)
1252
+ if original_tokens else 0.0
1253
+ )
1254
+ decision = _message_decision(
1255
+ tokens_saved=tokens_saved_total,
1256
+ receipts=nested_receipts,
1257
+ )
1258
+ aggregate_receipt = {
1259
+ "receipt_version": "message-compression-receipt-v0.1.0",
1260
+ "receipt_id": _aggregate_receipt_id({
1261
+ "input_sha256": _stable_json_sha256(messages),
1262
+ "output_sha256": _stable_json_sha256(output_messages),
1263
+ "receipt_ids": receipt_ids,
1264
+ "tokens_saved": tokens_saved_total,
1265
+ "compression_percentage": compression_pct,
1266
+ "decision": decision,
1267
+ }),
1268
+ "request_id": request_id,
1269
+ "idempotency_key": idempotency_key,
1270
+ "message_count": len(messages),
1271
+ "compressed_message_count": compressed_message_count,
1272
+ "skipped_message_count": skipped_message_count,
1273
+ "protected_roles": sorted(protected_roles),
1274
+ "compress_roles": sorted(compress_roles) if compress_roles is not None else None,
1275
+ "decision": decision,
1276
+ "deletion_only": all(
1277
+ receipt.get("deletion_only", True) for receipt in nested_receipts
1278
+ ),
1279
+ "deterministic": True,
1280
+ "input_sha256": _stable_json_sha256(messages),
1281
+ "output_sha256": _stable_json_sha256(output_messages),
1282
+ "message_receipt_ids": receipt_ids,
1283
+ }
1284
+ compressed_prompt: dict[str, Any] = {
1285
+ "messages": output_messages,
1286
+ "protected_spans": protected_values,
1287
+ }
1288
+ if "tools" in payload:
1289
+ compressed_prompt["tools"] = payload["tools"]
1290
+ if "tool_schemas" in payload:
1291
+ compressed_prompt["tool_schemas"] = payload["tool_schemas"]
1292
+ body = {
1293
+ "schema_version": API_SCHEMA_VERSION,
1294
+ "status": "ok",
1295
+ "endpoint": "/v1/compress",
1296
+ "maturity": "measurement_only",
1297
+ "messages": output_messages,
1298
+ "compressed_messages": output_messages,
1299
+ "compressed_prompts": [compressed_prompt],
1300
+ "message_count": len(messages),
1301
+ "compressed_message_count": compressed_message_count,
1302
+ "skipped_message_count": skipped_message_count,
1303
+ "original_input_tokens": original_tokens,
1304
+ "output_tokens": output_tokens,
1305
+ "tokens_saved": tokens_saved_total,
1306
+ "compression_percentage": compression_pct,
1307
+ "receipt_ids": receipt_ids,
1308
+ "receipts": receipts,
1309
+ "receipt": aggregate_receipt,
1310
+ }
1311
+ if request_id is not None:
1312
+ body["request_id"] = request_id
1313
+ if idempotency_key is not None:
1314
+ body["idempotency_key"] = idempotency_key
1315
+ return body
1316
+
1317
+
1318
+ def _dispatch_compress(payload: dict[str, Any]) -> dict[str, Any]:
1319
+ if "inputs" in payload:
1320
+ return _handle_batch(payload)
1321
+ if "messages" in payload:
1322
+ return _handle_messages(payload)
1323
+ return _compress_text(payload)
1324
+
1325
+
1326
  def _tokens(text: str) -> list[dict[str, Any]]:
1327
  started = time.perf_counter()
1328
  try: