Upload folder using huggingface_hub
Browse files
README.md
CHANGED
|
@@ -26,14 +26,16 @@ Live Space:
|
|
| 26 |
|
| 27 |
- `https://wchen22-touchdown-compression-classifier.hf.space`
|
| 28 |
- Verified 2026-06-11 with HF CLI: runtime stage `RUNNING`, hardware
|
| 29 |
-
`cpu-basic`, domain `READY`
|
|
|
|
| 30 |
- The deployed scaffold supports chunked ONNX artifact inference for long
|
| 31 |
prompts. Use `hf spaces info wchen22/touchdown-compression-classifier --format
|
| 32 |
json` for the current repo/runtime SHA.
|
| 33 |
- Live smoke:
|
| 34 |
-
`python3 scripts/smoke_compression_api.py --base-url https://wchen22-touchdown-compression-classifier.hf.space --include-classify --include-batch --include-gzip`
|
| 35 |
validates `/health`, `/v1/classify`, single `/v1/compress`, and managed
|
| 36 |
-
`inputs[]` batch, plus gzipped JSON request/response
|
|
|
|
| 37 |
- Full deployment receipt:
|
| 38 |
`python3 scripts/verify_compression_space.py --expected-sha <sha> --out reports/generated/compression_space/hf_space_verification.json`
|
| 39 |
validates HF runtime metadata, repo/runtime SHA agreement, API smoke, and
|
|
@@ -41,12 +43,16 @@ Live Space:
|
|
| 41 |
- Fresh local receipts are written under
|
| 42 |
`reports/generated/compression_space/`; run the full verifier with the
|
| 43 |
current Space SHA to check runtime, API smoke, and remote/local file parity.
|
|
|
|
|
|
|
| 44 |
- Latest live result: `/v1/compress` saved 27/102 estimated tokens;
|
| 45 |
managed `inputs[]` returned `input_count=2`, `succeeded=2`, `failed=0`,
|
|
|
|
| 46 |
gzip transport returned `response_content_encoding=gzip`, and `/v1/classify`
|
| 47 |
returned KEEP-only DeBERTa tokenizer labels. Receipts include
|
| 48 |
-
removed-span/char totals, classifier DROP block reasons,
|
| 49 |
-
preservation counts when `tools` or `tool_schemas` are supplied
|
|
|
|
| 50 |
Matching `Idempotency-Key` retries replay the first in-memory response;
|
| 51 |
payload conflicts return HTTP 409. This is per-process memory on the Space,
|
| 52 |
not a durable distributed store.
|
|
|
|
| 26 |
|
| 27 |
- `https://wchen22-touchdown-compression-classifier.hf.space`
|
| 28 |
- Verified 2026-06-11 with HF CLI: runtime stage `RUNNING`, hardware
|
| 29 |
+
`cpu-basic`, domain `READY`, repo/runtime SHA
|
| 30 |
+
`b402ba63bf08ce65bd30da071256555382be4fe0`.
|
| 31 |
- The deployed scaffold supports chunked ONNX artifact inference for long
|
| 32 |
prompts. Use `hf spaces info wchen22/touchdown-compression-classifier --format
|
| 33 |
json` for the current repo/runtime SHA.
|
| 34 |
- Live smoke:
|
| 35 |
+
`python3 scripts/smoke_compression_api.py --base-url https://wchen22-touchdown-compression-classifier.hf.space --include-classify --include-batch --include-messages --include-gzip`
|
| 36 |
validates `/health`, `/v1/classify`, single `/v1/compress`, and managed
|
| 37 |
+
`inputs[]` batch, managed `messages[]`, plus gzipped JSON request/response
|
| 38 |
+
transport.
|
| 39 |
- Full deployment receipt:
|
| 40 |
`python3 scripts/verify_compression_space.py --expected-sha <sha> --out reports/generated/compression_space/hf_space_verification.json`
|
| 41 |
validates HF runtime metadata, repo/runtime SHA agreement, API smoke, and
|
|
|
|
| 43 |
- Fresh local receipts are written under
|
| 44 |
`reports/generated/compression_space/`; run the full verifier with the
|
| 45 |
current Space SHA to check runtime, API smoke, and remote/local file parity.
|
| 46 |
+
Current live receipt:
|
| 47 |
+
`reports/generated/compression_space/hf_space_verification_2026-06-11-idempotency-replay-health.json`.
|
| 48 |
- Latest live result: `/v1/compress` saved 27/102 estimated tokens;
|
| 49 |
managed `inputs[]` returned `input_count=2`, `succeeded=2`, `failed=0`,
|
| 50 |
+
managed `messages[]` returned `message_count=2` with system-role protection,
|
| 51 |
gzip transport returned `response_content_encoding=gzip`, and `/v1/classify`
|
| 52 |
returned KEEP-only DeBERTa tokenizer labels. Receipts include
|
| 53 |
+
removed-span/char totals, classifier DROP block reasons, tool-schema
|
| 54 |
+
preservation counts when `tools` or `tool_schemas` are supplied, and
|
| 55 |
+
`/health` idempotency TTL reporting.
|
| 56 |
Matching `Idempotency-Key` retries replay the first in-memory response;
|
| 57 |
payload conflicts return HTTP 409. This is per-process memory on the Space,
|
| 58 |
not a durable distributed store.
|
app.py
CHANGED
|
@@ -29,6 +29,7 @@ GZIP_ENCODING = "gzip"
|
|
| 29 |
GZIP_MAGIC = b"\x1f\x8b"
|
| 30 |
DEFAULT_IDEMPOTENCY_TTL_SECONDS = 24 * 60 * 60
|
| 31 |
IDEMPOTENCY_TTL_ENV = "TOUCHDOWN_IDEMPOTENCY_TTL_SECONDS"
|
|
|
|
| 32 |
LOW_SIGNAL_PATTERNS = [
|
| 33 |
re.compile(pattern, re.IGNORECASE)
|
| 34 |
for pattern in [
|
|
@@ -315,7 +316,7 @@ def _store_idempotency_body(
|
|
| 315 |
def _handle_compress_with_idempotency(payload: dict[str, Any]) -> dict[str, Any]:
|
| 316 |
key = _idempotency_key_from_payload(payload)
|
| 317 |
if not key:
|
| 318 |
-
return
|
| 319 |
route = "/v1/compress"
|
| 320 |
fingerprint = _idempotency_fingerprint(route, payload)
|
| 321 |
cached = _cached_idempotency_body(
|
|
@@ -325,7 +326,7 @@ def _handle_compress_with_idempotency(payload: dict[str, Any]) -> dict[str, Any]
|
|
| 325 |
)
|
| 326 |
if cached is not None:
|
| 327 |
return cached
|
| 328 |
-
body =
|
| 329 |
request_id = payload.get("request_id") if isinstance(payload.get("request_id"), str) else None
|
| 330 |
return _store_idempotency_body(
|
| 331 |
route=route,
|
|
@@ -650,6 +651,66 @@ def _receipt_id(payload: dict[str, Any]) -> str:
|
|
| 650 |
return "tdcr_" + hashlib.sha256(encoded.encode("utf-8")).hexdigest()[:24]
|
| 651 |
|
| 652 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
def _correlation_payload(
|
| 654 |
payload: dict[str, Any],
|
| 655 |
*,
|
|
@@ -1085,6 +1146,183 @@ def _handle_batch(payload: dict[str, Any]) -> dict[str, Any]:
|
|
| 1085 |
return body
|
| 1086 |
|
| 1087 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1088 |
def _tokens(text: str) -> list[dict[str, Any]]:
|
| 1089 |
started = time.perf_counter()
|
| 1090 |
try:
|
|
|
|
| 29 |
GZIP_MAGIC = b"\x1f\x8b"
|
| 30 |
DEFAULT_IDEMPOTENCY_TTL_SECONDS = 24 * 60 * 60
|
| 31 |
IDEMPOTENCY_TTL_ENV = "TOUCHDOWN_IDEMPOTENCY_TTL_SECONDS"
|
| 32 |
+
DEFAULT_PROTECTED_MESSAGE_ROLES = ("system", "developer")
|
| 33 |
LOW_SIGNAL_PATTERNS = [
|
| 34 |
re.compile(pattern, re.IGNORECASE)
|
| 35 |
for pattern in [
|
|
|
|
| 316 |
def _handle_compress_with_idempotency(payload: dict[str, Any]) -> dict[str, Any]:
|
| 317 |
key = _idempotency_key_from_payload(payload)
|
| 318 |
if not key:
|
| 319 |
+
return _dispatch_compress(payload)
|
| 320 |
route = "/v1/compress"
|
| 321 |
fingerprint = _idempotency_fingerprint(route, payload)
|
| 322 |
cached = _cached_idempotency_body(
|
|
|
|
| 326 |
)
|
| 327 |
if cached is not None:
|
| 328 |
return cached
|
| 329 |
+
body = _dispatch_compress(payload)
|
| 330 |
request_id = payload.get("request_id") if isinstance(payload.get("request_id"), str) else None
|
| 331 |
return _store_idempotency_body(
|
| 332 |
route=route,
|
|
|
|
| 651 |
return "tdcr_" + hashlib.sha256(encoded.encode("utf-8")).hexdigest()[:24]
|
| 652 |
|
| 653 |
|
| 654 |
+
def _stable_json_sha256(value: Any) -> str:
|
| 655 |
+
encoded = json.dumps(
|
| 656 |
+
value,
|
| 657 |
+
ensure_ascii=False,
|
| 658 |
+
sort_keys=True,
|
| 659 |
+
separators=(",", ":"),
|
| 660 |
+
)
|
| 661 |
+
return hashlib.sha256(encoded.encode("utf-8")).hexdigest()
|
| 662 |
+
|
| 663 |
+
|
| 664 |
+
def _aggregate_receipt_id(payload: dict[str, Any]) -> str:
|
| 665 |
+
encoded = json.dumps(payload, sort_keys=True, separators=(",", ":"))
|
| 666 |
+
return "tdcm_" + hashlib.sha256(encoded.encode("utf-8")).hexdigest()[:24]
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
def _string_set(
|
| 670 |
+
value: Any,
|
| 671 |
+
*,
|
| 672 |
+
default: tuple[str, ...] = (),
|
| 673 |
+
field_name: str,
|
| 674 |
+
) -> set[str]:
|
| 675 |
+
if value is None:
|
| 676 |
+
return set(default)
|
| 677 |
+
if (
|
| 678 |
+
isinstance(value, list)
|
| 679 |
+
and all(isinstance(item, str) and item for item in value)
|
| 680 |
+
):
|
| 681 |
+
return {item.lower() for item in value}
|
| 682 |
+
raise HTTPException(status_code=400, detail=f"{field_name} must be a list of strings")
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
def _message_role(message: dict[str, Any], index: int) -> str:
|
| 686 |
+
role = message.get("role")
|
| 687 |
+
if not isinstance(role, str) or not role:
|
| 688 |
+
raise HTTPException(
|
| 689 |
+
status_code=400,
|
| 690 |
+
detail=f"messages[{index}].role must be a string",
|
| 691 |
+
)
|
| 692 |
+
return role.lower()
|
| 693 |
+
|
| 694 |
+
|
| 695 |
+
def _message_decision(
|
| 696 |
+
*,
|
| 697 |
+
tokens_saved: int,
|
| 698 |
+
receipts: list[dict[str, Any]],
|
| 699 |
+
) -> str:
|
| 700 |
+
decisions = [
|
| 701 |
+
receipt.get("decision")
|
| 702 |
+
for receipt in receipts
|
| 703 |
+
if isinstance(receipt, dict)
|
| 704 |
+
]
|
| 705 |
+
if any(decision == "reject" for decision in decisions):
|
| 706 |
+
return "reject"
|
| 707 |
+
if any(decision == "needs_review" for decision in decisions):
|
| 708 |
+
return "needs_review"
|
| 709 |
+
if tokens_saved <= 0:
|
| 710 |
+
return "no_op"
|
| 711 |
+
return "high_confidence"
|
| 712 |
+
|
| 713 |
+
|
| 714 |
def _correlation_payload(
|
| 715 |
payload: dict[str, Any],
|
| 716 |
*,
|
|
|
|
| 1146 |
return body
|
| 1147 |
|
| 1148 |
|
| 1149 |
+
def _handle_messages(payload: dict[str, Any]) -> dict[str, Any]:
|
| 1150 |
+
if "input" in payload or "inputs" in payload:
|
| 1151 |
+
raise HTTPException(status_code=400, detail="provide either messages, input, or inputs")
|
| 1152 |
+
messages = payload.get("messages")
|
| 1153 |
+
if not isinstance(messages, list) or not messages:
|
| 1154 |
+
raise HTTPException(status_code=400, detail="messages must be a non-empty list")
|
| 1155 |
+
if not all(isinstance(message, dict) for message in messages):
|
| 1156 |
+
raise HTTPException(status_code=400, detail="messages entries must be objects")
|
| 1157 |
+
settings = payload.get("compression_settings") or {}
|
| 1158 |
+
if not isinstance(settings, dict):
|
| 1159 |
+
raise HTTPException(status_code=400, detail="compression_settings must be an object")
|
| 1160 |
+
protected_roles = _string_set(
|
| 1161 |
+
settings.get("protected_roles"),
|
| 1162 |
+
default=DEFAULT_PROTECTED_MESSAGE_ROLES,
|
| 1163 |
+
field_name="compression_settings.protected_roles",
|
| 1164 |
+
)
|
| 1165 |
+
compress_roles = (
|
| 1166 |
+
_string_set(
|
| 1167 |
+
settings.get("compress_roles"),
|
| 1168 |
+
field_name="compression_settings.compress_roles",
|
| 1169 |
+
)
|
| 1170 |
+
if "compress_roles" in settings else None
|
| 1171 |
+
)
|
| 1172 |
+
protected_values = payload.get("protected_spans") or []
|
| 1173 |
+
if not isinstance(protected_values, list) or not all(
|
| 1174 |
+
isinstance(value, str) for value in protected_values
|
| 1175 |
+
):
|
| 1176 |
+
raise HTTPException(status_code=400, detail="protected_spans must be strings")
|
| 1177 |
+
request_id = payload.get("request_id")
|
| 1178 |
+
if request_id is not None and not isinstance(request_id, str):
|
| 1179 |
+
raise HTTPException(status_code=400, detail="request_id must be a string")
|
| 1180 |
+
idempotency_key = payload.get("idempotency_key")
|
| 1181 |
+
if idempotency_key is not None and not isinstance(idempotency_key, str):
|
| 1182 |
+
raise HTTPException(status_code=400, detail="idempotency_key must be a string")
|
| 1183 |
+
|
| 1184 |
+
output_messages: list[dict[str, Any]] = []
|
| 1185 |
+
receipts: list[dict[str, Any]] = []
|
| 1186 |
+
nested_receipts: list[dict[str, Any]] = []
|
| 1187 |
+
receipt_ids: list[str] = []
|
| 1188 |
+
original_tokens = 0
|
| 1189 |
+
output_tokens = 0
|
| 1190 |
+
compressed_message_count = 0
|
| 1191 |
+
skipped_message_count = 0
|
| 1192 |
+
|
| 1193 |
+
for index, message in enumerate(messages):
|
| 1194 |
+
role = _message_role(message, index)
|
| 1195 |
+
content = message.get("content")
|
| 1196 |
+
output_message = dict(message)
|
| 1197 |
+
if not isinstance(content, str):
|
| 1198 |
+
skipped_message_count += 1
|
| 1199 |
+
receipts.append({
|
| 1200 |
+
"index": index,
|
| 1201 |
+
"role": role,
|
| 1202 |
+
"status": "skipped",
|
| 1203 |
+
"reason": "non_string_content",
|
| 1204 |
+
"content_type": type(content).__name__,
|
| 1205 |
+
"original_input_tokens": 0,
|
| 1206 |
+
"output_tokens": 0,
|
| 1207 |
+
"tokens_saved": 0,
|
| 1208 |
+
})
|
| 1209 |
+
output_messages.append(output_message)
|
| 1210 |
+
continue
|
| 1211 |
+
|
| 1212 |
+
role_protected = role in protected_roles or (
|
| 1213 |
+
compress_roles is not None and role not in compress_roles
|
| 1214 |
+
)
|
| 1215 |
+
item_protected = list(protected_values)
|
| 1216 |
+
if role_protected and content:
|
| 1217 |
+
item_protected.append(content)
|
| 1218 |
+
result = _compress_text({
|
| 1219 |
+
"input": content,
|
| 1220 |
+
"compression_settings": settings,
|
| 1221 |
+
"protected_spans": item_protected,
|
| 1222 |
+
"tool_schemas": payload.get("tool_schemas", payload.get("tools")),
|
| 1223 |
+
"request_id": request_id,
|
| 1224 |
+
"idempotency_key": idempotency_key,
|
| 1225 |
+
})
|
| 1226 |
+
output_message["content"] = result["output"]
|
| 1227 |
+
output_messages.append(output_message)
|
| 1228 |
+
original_tokens += int(result["original_input_tokens"])
|
| 1229 |
+
output_tokens += int(result["output_tokens"])
|
| 1230 |
+
tokens_saved = int(result["tokens_saved"])
|
| 1231 |
+
if tokens_saved > 0:
|
| 1232 |
+
compressed_message_count += 1
|
| 1233 |
+
receipt = result["receipt"]
|
| 1234 |
+
nested_receipts.append(receipt)
|
| 1235 |
+
receipt_ids.append(receipt["receipt_id"])
|
| 1236 |
+
receipts.append({
|
| 1237 |
+
"index": index,
|
| 1238 |
+
"role": role,
|
| 1239 |
+
"status": "ok",
|
| 1240 |
+
"protected_by_role": role_protected,
|
| 1241 |
+
"original_input_tokens": result["original_input_tokens"],
|
| 1242 |
+
"output_tokens": result["output_tokens"],
|
| 1243 |
+
"tokens_saved": tokens_saved,
|
| 1244 |
+
"compression_percentage": result["compression_percentage"],
|
| 1245 |
+
"receipt_id": receipt["receipt_id"],
|
| 1246 |
+
"receipt": receipt,
|
| 1247 |
+
})
|
| 1248 |
+
|
| 1249 |
+
tokens_saved_total = max(0, original_tokens - output_tokens)
|
| 1250 |
+
compression_pct = (
|
| 1251 |
+
round(100.0 * tokens_saved_total / original_tokens, 1)
|
| 1252 |
+
if original_tokens else 0.0
|
| 1253 |
+
)
|
| 1254 |
+
decision = _message_decision(
|
| 1255 |
+
tokens_saved=tokens_saved_total,
|
| 1256 |
+
receipts=nested_receipts,
|
| 1257 |
+
)
|
| 1258 |
+
aggregate_receipt = {
|
| 1259 |
+
"receipt_version": "message-compression-receipt-v0.1.0",
|
| 1260 |
+
"receipt_id": _aggregate_receipt_id({
|
| 1261 |
+
"input_sha256": _stable_json_sha256(messages),
|
| 1262 |
+
"output_sha256": _stable_json_sha256(output_messages),
|
| 1263 |
+
"receipt_ids": receipt_ids,
|
| 1264 |
+
"tokens_saved": tokens_saved_total,
|
| 1265 |
+
"compression_percentage": compression_pct,
|
| 1266 |
+
"decision": decision,
|
| 1267 |
+
}),
|
| 1268 |
+
"request_id": request_id,
|
| 1269 |
+
"idempotency_key": idempotency_key,
|
| 1270 |
+
"message_count": len(messages),
|
| 1271 |
+
"compressed_message_count": compressed_message_count,
|
| 1272 |
+
"skipped_message_count": skipped_message_count,
|
| 1273 |
+
"protected_roles": sorted(protected_roles),
|
| 1274 |
+
"compress_roles": sorted(compress_roles) if compress_roles is not None else None,
|
| 1275 |
+
"decision": decision,
|
| 1276 |
+
"deletion_only": all(
|
| 1277 |
+
receipt.get("deletion_only", True) for receipt in nested_receipts
|
| 1278 |
+
),
|
| 1279 |
+
"deterministic": True,
|
| 1280 |
+
"input_sha256": _stable_json_sha256(messages),
|
| 1281 |
+
"output_sha256": _stable_json_sha256(output_messages),
|
| 1282 |
+
"message_receipt_ids": receipt_ids,
|
| 1283 |
+
}
|
| 1284 |
+
compressed_prompt: dict[str, Any] = {
|
| 1285 |
+
"messages": output_messages,
|
| 1286 |
+
"protected_spans": protected_values,
|
| 1287 |
+
}
|
| 1288 |
+
if "tools" in payload:
|
| 1289 |
+
compressed_prompt["tools"] = payload["tools"]
|
| 1290 |
+
if "tool_schemas" in payload:
|
| 1291 |
+
compressed_prompt["tool_schemas"] = payload["tool_schemas"]
|
| 1292 |
+
body = {
|
| 1293 |
+
"schema_version": API_SCHEMA_VERSION,
|
| 1294 |
+
"status": "ok",
|
| 1295 |
+
"endpoint": "/v1/compress",
|
| 1296 |
+
"maturity": "measurement_only",
|
| 1297 |
+
"messages": output_messages,
|
| 1298 |
+
"compressed_messages": output_messages,
|
| 1299 |
+
"compressed_prompts": [compressed_prompt],
|
| 1300 |
+
"message_count": len(messages),
|
| 1301 |
+
"compressed_message_count": compressed_message_count,
|
| 1302 |
+
"skipped_message_count": skipped_message_count,
|
| 1303 |
+
"original_input_tokens": original_tokens,
|
| 1304 |
+
"output_tokens": output_tokens,
|
| 1305 |
+
"tokens_saved": tokens_saved_total,
|
| 1306 |
+
"compression_percentage": compression_pct,
|
| 1307 |
+
"receipt_ids": receipt_ids,
|
| 1308 |
+
"receipts": receipts,
|
| 1309 |
+
"receipt": aggregate_receipt,
|
| 1310 |
+
}
|
| 1311 |
+
if request_id is not None:
|
| 1312 |
+
body["request_id"] = request_id
|
| 1313 |
+
if idempotency_key is not None:
|
| 1314 |
+
body["idempotency_key"] = idempotency_key
|
| 1315 |
+
return body
|
| 1316 |
+
|
| 1317 |
+
|
| 1318 |
+
def _dispatch_compress(payload: dict[str, Any]) -> dict[str, Any]:
|
| 1319 |
+
if "inputs" in payload:
|
| 1320 |
+
return _handle_batch(payload)
|
| 1321 |
+
if "messages" in payload:
|
| 1322 |
+
return _handle_messages(payload)
|
| 1323 |
+
return _compress_text(payload)
|
| 1324 |
+
|
| 1325 |
+
|
| 1326 |
def _tokens(text: str) -> list[dict[str, Any]]:
|
| 1327 |
started = time.perf_counter()
|
| 1328 |
try:
|