Spaces:
Running
Running
Commit ·
719e30d
1
Parent(s): b1e2218
Redact user document content from traces before upload
Browse filesStrip tool call stdout, scratch files, system prompt, and tool
messages to protect uploaded document content. Preserve question,
answer, citations, stats, and telemetry for quality review.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
app.py
CHANGED
|
@@ -55,13 +55,35 @@ def _slugify(text: str, max_len: int = 50) -> str:
|
|
| 55 |
return slug[:max_len].rstrip("-")
|
| 56 |
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def upload_trace(result: dict) -> None:
|
| 59 |
if not hf_api or not HF_TRACES_REPO:
|
| 60 |
return
|
|
|
|
| 61 |
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S-%f")
|
| 62 |
question_slug = _slugify(result.get("question", ""))
|
| 63 |
filename = f"{timestamp}_{question_slug}.json" if question_slug else f"{timestamp}.json"
|
| 64 |
-
content = json.dumps(
|
| 65 |
try:
|
| 66 |
hf_api.upload_file(
|
| 67 |
path_or_fileobj=content,
|
|
|
|
| 55 |
return slug[:max_len].rstrip("-")
|
| 56 |
|
| 57 |
|
| 58 |
+
def _redact_trace(result: dict) -> dict:
|
| 59 |
+
"""Strip document content from tool calls to protect user data."""
|
| 60 |
+
import copy
|
| 61 |
+
sanitized = copy.deepcopy(result)
|
| 62 |
+
for tc in sanitized.get("trace", {}).get("tool_calls", []):
|
| 63 |
+
try:
|
| 64 |
+
tool_result = json.loads(tc.get("result", "{}"))
|
| 65 |
+
if "stdout" in tool_result:
|
| 66 |
+
tool_result["stdout"] = f"[redacted — {len(tool_result['stdout'])} chars]"
|
| 67 |
+
tc["result"] = json.dumps(tool_result)
|
| 68 |
+
except (json.JSONDecodeError, TypeError):
|
| 69 |
+
tc["result"] = "[redacted]"
|
| 70 |
+
sanitized.get("trace", {}).pop("scratch_files", None)
|
| 71 |
+
for msg in sanitized.get("trace", {}).get("messages", []):
|
| 72 |
+
if msg.get("role") == "system":
|
| 73 |
+
msg["content"] = "[redacted]"
|
| 74 |
+
elif msg.get("role") == "tool":
|
| 75 |
+
msg["content"] = "[redacted]"
|
| 76 |
+
return sanitized
|
| 77 |
+
|
| 78 |
+
|
| 79 |
def upload_trace(result: dict) -> None:
|
| 80 |
if not hf_api or not HF_TRACES_REPO:
|
| 81 |
return
|
| 82 |
+
sanitized = _redact_trace(result)
|
| 83 |
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S-%f")
|
| 84 |
question_slug = _slugify(result.get("question", ""))
|
| 85 |
filename = f"{timestamp}_{question_slug}.json" if question_slug else f"{timestamp}.json"
|
| 86 |
+
content = json.dumps(sanitized, indent=2, default=str).encode()
|
| 87 |
try:
|
| 88 |
hf_api.upload_file(
|
| 89 |
path_or_fileobj=content,
|