chuckfinca Claude Opus 4.6 (1M context) commited on
Commit
719e30d
·
1 Parent(s): b1e2218

Redact user document content from traces before upload

Browse files

Strip tool call stdout, scratch files, system prompt, and tool
messages to protect uploaded document content. Preserve question,
answer, citations, stats, and telemetry for quality review.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -55,13 +55,35 @@ def _slugify(text: str, max_len: int = 50) -> str:
55
  return slug[:max_len].rstrip("-")
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def upload_trace(result: dict) -> None:
59
  if not hf_api or not HF_TRACES_REPO:
60
  return
 
61
  timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S-%f")
62
  question_slug = _slugify(result.get("question", ""))
63
  filename = f"{timestamp}_{question_slug}.json" if question_slug else f"{timestamp}.json"
64
- content = json.dumps(result, indent=2, default=str).encode()
65
  try:
66
  hf_api.upload_file(
67
  path_or_fileobj=content,
 
55
  return slug[:max_len].rstrip("-")
56
 
57
 
58
+ def _redact_trace(result: dict) -> dict:
59
+ """Strip document content from tool calls to protect user data."""
60
+ import copy
61
+ sanitized = copy.deepcopy(result)
62
+ for tc in sanitized.get("trace", {}).get("tool_calls", []):
63
+ try:
64
+ tool_result = json.loads(tc.get("result", "{}"))
65
+ if "stdout" in tool_result:
66
+ tool_result["stdout"] = f"[redacted — {len(tool_result['stdout'])} chars]"
67
+ tc["result"] = json.dumps(tool_result)
68
+ except (json.JSONDecodeError, TypeError):
69
+ tc["result"] = "[redacted]"
70
+ sanitized.get("trace", {}).pop("scratch_files", None)
71
+ for msg in sanitized.get("trace", {}).get("messages", []):
72
+ if msg.get("role") == "system":
73
+ msg["content"] = "[redacted]"
74
+ elif msg.get("role") == "tool":
75
+ msg["content"] = "[redacted]"
76
+ return sanitized
77
+
78
+
79
  def upload_trace(result: dict) -> None:
80
  if not hf_api or not HF_TRACES_REPO:
81
  return
82
+ sanitized = _redact_trace(result)
83
  timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S-%f")
84
  question_slug = _slugify(result.get("question", ""))
85
  filename = f"{timestamp}_{question_slug}.json" if question_slug else f"{timestamp}.json"
86
+ content = json.dumps(sanitized, indent=2, default=str).encode()
87
  try:
88
  hf_api.upload_file(
89
  path_or_fileobj=content,