wchen22 commited on
Commit
b784d0b
·
verified ·
1 Parent(s): 2343c2a

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +2 -1
  2. app.py +173 -31
README.md CHANGED
@@ -29,7 +29,8 @@ Live Space:
29
  `/v1/compress` returned 200.
30
  - `/v1/classify` is tokenizer/fallback KEEP-only until a trained KEEP/DROP head
31
  is mounted. `/v1/compress` is rules-first deletion-only compression with
32
- safety receipts.
 
33
  - Mount `classifier_manifest.json`, tokenizer files, and optional `model.onnx`;
34
  set `TOUCHDOWN_CLASSIFIER_ARTIFACT_DIR` to let the Space use artifact DROP
35
  labels through ONNX Runtime or the manifest fallback. Those labels still pass
 
29
  `/v1/compress` returned 200.
30
  - `/v1/classify` is tokenizer/fallback KEEP-only until a trained KEEP/DROP head
31
  is mounted. `/v1/compress` is rules-first deletion-only compression with
32
+ safety receipts. The Space app supports both single `input` requests and
33
+ managed `inputs[]` batches with per-item receipts and partial-error rows.
34
  - Mount `classifier_manifest.json`, tokenizer files, and optional `model.onnx`;
35
  set `TOUCHDOWN_CLASSIFIER_ARTIFACT_DIR` to let the Space use artifact DROP
36
  labels through ONNX Runtime or the manifest fallback. Those labels still pass
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  import json
4
  import math
5
  import os
@@ -13,6 +14,7 @@ from fastapi import FastAPI, HTTPException
13
 
14
  CLASSIFIER_MODEL = "microsoft/deberta-v3-small"
15
  CLASSIFIER_ARTIFACT_DIR = os.environ.get("TOUCHDOWN_CLASSIFIER_ARTIFACT_DIR")
 
16
  RULES_VERSION = "hf-space-rules-v0.1.0"
17
  LOW_SIGNAL_PATTERNS = [
18
  re.compile(pattern, re.IGNORECASE)
@@ -264,6 +266,15 @@ def _is_subsequence(candidate: str, original: str) -> bool:
264
  return True
265
 
266
 
 
 
 
 
 
 
 
 
 
267
  def _protected_spans(
268
  text: str,
269
  protected_values: list[str],
@@ -388,43 +399,172 @@ def _compress_text(payload: dict[str, Any]) -> dict[str, Any]:
388
  ) else (
389
  "high_confidence" if saved > 0 and aggressiveness <= 0.65 else "no_op"
390
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  return {
 
 
 
 
392
  "output": output,
393
  "original_input_tokens": before,
394
  "output_tokens": after,
395
  "tokens_saved": saved,
396
  "compression_percentage": round(100.0 * saved / before, 1),
397
- "receipt": {
398
- "protected_spans_checked": len(protected_values),
399
- "protected_spans_missing": len(missing),
400
- "code_blocks_detected": len(code_spans),
401
- "code_blocks_preserved": code_preserved,
402
- "json_blocks_detected": len(json_spans),
403
- "json_blocks_preserved": json_preserved,
404
- "system_prompt_spans_detected": len(system_spans),
405
- "system_prompts_preserved": system_preserved,
406
- "decision": decision,
407
- "compressor_latency_ms": round((time.perf_counter() - started) * 1000.0, 3),
408
- "deletion_only": _is_subsequence(output, text),
409
- "deterministic": True,
410
- "rules_version": RULES_VERSION,
411
- "classifier": {
412
- "model": CLASSIFIER_MODEL,
413
- "status": classifier_status,
414
- "artifact_dir_configured": bool(CLASSIFIER_ARTIFACT_DIR),
415
- "artifact_dir": CLASSIFIER_ARTIFACT_DIR,
416
- "error": classifier_error,
417
- "labels_received": len(classifier_labels),
418
- "drop_labels": classifier_drop_labels,
419
- "drop_spans_applied": classifier_applied,
420
- "drop_spans_blocked_by_safety": classifier_blocked,
421
- },
422
- "dropped_segments_count": len(drops),
423
- "dropped_segments": [
424
- {"reason": reason, "preview": preview}
425
- for _, _, reason, preview in drops[:20]
426
- ],
427
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  }
429
 
430
 
@@ -501,4 +641,6 @@ def classify(payload: dict[str, Any]) -> dict[str, Any]:
501
 
502
  @app.post("/v1/compress")
503
  def compress(payload: dict[str, Any]) -> dict[str, Any]:
 
 
504
  return _compress_text(payload)
 
1
  from __future__ import annotations
2
 
3
+ import hashlib
4
  import json
5
  import math
6
  import os
 
14
 
15
  CLASSIFIER_MODEL = "microsoft/deberta-v3-small"
16
  CLASSIFIER_ARTIFACT_DIR = os.environ.get("TOUCHDOWN_CLASSIFIER_ARTIFACT_DIR")
17
+ API_SCHEMA_VERSION = "0.1.0"
18
  RULES_VERSION = "hf-space-rules-v0.1.0"
19
  LOW_SIGNAL_PATTERNS = [
20
  re.compile(pattern, re.IGNORECASE)
 
266
  return True
267
 
268
 
269
+ def _sha256_text(value: str) -> str:
270
+ return hashlib.sha256(value.encode("utf-8")).hexdigest()
271
+
272
+
273
+ def _receipt_id(payload: dict[str, Any]) -> str:
274
+ encoded = json.dumps(payload, sort_keys=True, separators=(",", ":"))
275
+ return "tdcr_" + hashlib.sha256(encoded.encode("utf-8")).hexdigest()[:24]
276
+
277
+
278
  def _protected_spans(
279
  text: str,
280
  protected_values: list[str],
 
399
  ) else (
400
  "high_confidence" if saved > 0 and aggressiveness <= 0.65 else "no_op"
401
  )
402
+ dropped_segments = [
403
+ {"reason": reason, "preview": preview, "start": start, "end": end}
404
+ for start, end, reason, preview in drops[:20]
405
+ ]
406
+ receipt = {
407
+ "protected_spans_checked": len(protected_values),
408
+ "protected_spans_missing": len(missing),
409
+ "code_blocks_detected": len(code_spans),
410
+ "code_blocks_preserved": code_preserved,
411
+ "json_blocks_detected": len(json_spans),
412
+ "json_blocks_preserved": json_preserved,
413
+ "system_prompt_spans_detected": len(system_spans),
414
+ "system_prompts_preserved": system_preserved,
415
+ "decision": decision,
416
+ "compressor_latency_ms": round((time.perf_counter() - started) * 1000.0, 3),
417
+ "deletion_only": _is_subsequence(output, text),
418
+ "deterministic": True,
419
+ "rules_version": RULES_VERSION,
420
+ "classifier": {
421
+ "model": CLASSIFIER_MODEL,
422
+ "status": classifier_status,
423
+ "artifact_dir_configured": bool(CLASSIFIER_ARTIFACT_DIR),
424
+ "artifact_dir": CLASSIFIER_ARTIFACT_DIR,
425
+ "error": classifier_error,
426
+ "labels_received": len(classifier_labels),
427
+ "drop_labels": classifier_drop_labels,
428
+ "drop_spans_applied": classifier_applied,
429
+ "drop_spans_blocked_by_safety": classifier_blocked,
430
+ },
431
+ "dropped_segments_count": len(drops),
432
+ "dropped_segments": dropped_segments,
433
+ }
434
+ receipt["input_sha256"] = _sha256_text(text)
435
+ receipt["output_sha256"] = _sha256_text(output)
436
+ receipt["removed_sha256"] = _sha256_text(
437
+ "".join(text[start:end] for start, end in drop_ranges)
438
+ )
439
+ receipt["receipt_id"] = _receipt_id({
440
+ "input_sha256": receipt["input_sha256"],
441
+ "output_sha256": receipt["output_sha256"],
442
+ "removed_sha256": receipt["removed_sha256"],
443
+ "tokens_saved": saved,
444
+ "compression_percentage": round(100.0 * saved / before, 1),
445
+ "decision": decision,
446
+ "rules_version": RULES_VERSION,
447
+ "classifier": receipt["classifier"],
448
+ "dropped_segments": dropped_segments,
449
+ })
450
  return {
451
+ "schema_version": API_SCHEMA_VERSION,
452
+ "status": "ok",
453
+ "endpoint": "/v1/compress",
454
+ "maturity": "measurement_only",
455
  "output": output,
456
  "original_input_tokens": before,
457
  "output_tokens": after,
458
  "tokens_saved": saved,
459
  "compression_percentage": round(100.0 * saved / before, 1),
460
+ "receipt": receipt,
461
+ }
462
+
463
+
464
+ def _merge_batch_item_payload(
465
+ payload: dict[str, Any],
466
+ item: Any,
467
+ index: int,
468
+ ) -> tuple[str | None, dict[str, Any]]:
469
+ if isinstance(item, str):
470
+ return None, {
471
+ "input": item,
472
+ "compression_settings": payload.get("compression_settings"),
473
+ "protected_spans": payload.get("protected_spans"),
474
+ }
475
+ if not isinstance(item, dict):
476
+ raise ValueError(f"inputs[{index}] must be a string or an object")
477
+
478
+ item_id = item.get("id")
479
+ if item_id is not None and not isinstance(item_id, str):
480
+ raise ValueError(f"inputs[{index}].id must be a string")
481
+ if "input" not in item:
482
+ raise ValueError(f"inputs[{index}].input is required")
483
+
484
+ top_settings = payload.get("compression_settings")
485
+ item_settings = item.get("compression_settings")
486
+ if top_settings is not None and not isinstance(top_settings, dict):
487
+ raise ValueError("compression_settings must be an object")
488
+ if item_settings is not None and not isinstance(item_settings, dict):
489
+ raise ValueError(f"inputs[{index}].compression_settings must be an object")
490
+ settings = {
491
+ **(top_settings or {}),
492
+ **(item_settings or {}),
493
+ } or None
494
+
495
+ return item_id, {
496
+ "input": item.get("input"),
497
+ "compression_settings": settings,
498
+ "protected_spans": item.get("protected_spans", payload.get("protected_spans")),
499
+ }
500
+
501
+
502
+ def _handle_batch(payload: dict[str, Any]) -> dict[str, Any]:
503
+ if "input" in payload:
504
+ raise HTTPException(status_code=400, detail="provide either input or inputs, not both")
505
+ inputs = payload.get("inputs")
506
+ if not isinstance(inputs, list):
507
+ raise HTTPException(status_code=400, detail="inputs must be a list")
508
+ if not inputs:
509
+ raise HTTPException(status_code=400, detail="inputs list is empty")
510
+
511
+ results: list[dict[str, Any]] = []
512
+ totals = {
513
+ "original_input_tokens": 0,
514
+ "output_tokens": 0,
515
+ "tokens_saved": 0,
516
+ }
517
+ succeeded = 0
518
+ failed = 0
519
+ for index, item in enumerate(inputs):
520
+ item_result: dict[str, Any] = {"index": index}
521
+ if isinstance(item, dict) and isinstance(item.get("id"), str):
522
+ item_result["id"] = item["id"]
523
+ try:
524
+ item_id, item_payload = _merge_batch_item_payload(payload, item, index)
525
+ if item_id is not None:
526
+ item_result["id"] = item_id
527
+ result = _compress_text(item_payload)
528
+ except HTTPException as exc:
529
+ item_result.update({"status": "error", "error": str(exc.detail)})
530
+ failed += 1
531
+ results.append(item_result)
532
+ continue
533
+ except ValueError as exc:
534
+ item_result.update({"status": "error", "error": str(exc)})
535
+ failed += 1
536
+ results.append(item_result)
537
+ continue
538
+
539
+ item_result.update({"status": "ok", **result})
540
+ totals["original_input_tokens"] += int(result["original_input_tokens"])
541
+ totals["output_tokens"] += int(result["output_tokens"])
542
+ totals["tokens_saved"] += int(result["tokens_saved"])
543
+ succeeded += 1
544
+ results.append(item_result)
545
+
546
+ compression_pct = (
547
+ round(100.0 * totals["tokens_saved"] / totals["original_input_tokens"], 1)
548
+ if totals["original_input_tokens"]
549
+ else 0.0
550
+ )
551
+ receipt_ids = [
552
+ result["receipt"]["receipt_id"]
553
+ for result in results
554
+ if result.get("status") == "ok" and result.get("receipt", {}).get("receipt_id")
555
+ ]
556
+ return {
557
+ "schema_version": API_SCHEMA_VERSION,
558
+ "status": "ok" if failed == 0 else "partial_error",
559
+ "endpoint": "/v1/compress",
560
+ "maturity": "measurement_only",
561
+ "input_count": len(inputs),
562
+ "succeeded": succeeded,
563
+ "failed": failed,
564
+ **totals,
565
+ "compression_percentage": compression_pct,
566
+ "receipt_ids": receipt_ids,
567
+ "results": results,
568
  }
569
 
570
 
 
641
 
642
  @app.post("/v1/compress")
643
  def compress(payload: dict[str, Any]) -> dict[str, Any]:
644
+ if "inputs" in payload:
645
+ return _handle_batch(payload)
646
  return _compress_text(payload)