Spaces:
Running
Running
TruffleHog backstop: soft-warn (flag for review) instead of hard-reject; drop FP Twilio regex
Browse files
app.py
CHANGED
|
@@ -197,19 +197,15 @@ async def donate(request: Request):
|
|
| 197 |
status_code=422,
|
| 198 |
)
|
| 199 |
|
| 200 |
-
# ---
|
| 201 |
-
# Catches what the regex pass cannot (vendor tokens with no fixed prefix
|
| 202 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
th_detectors = trufflehog_findings(cleaned)
|
| 204 |
-
if th_detectors:
|
| 205 |
-
return JSONResponse(
|
| 206 |
-
{
|
| 207 |
-
"error": "secrets_found",
|
| 208 |
-
"detail": "TruffleHog (server backstop) flagged likely secrets that survived the client scrub. Donation rejected — clean the flagged values and retry.",
|
| 209 |
-
"detectors": th_detectors,
|
| 210 |
-
},
|
| 211 |
-
status_code=422,
|
| 212 |
-
)
|
| 213 |
|
| 214 |
if not HF_TOKEN or not DATASET_REPO:
|
| 215 |
# Not yet configured — accept-validate but don't pretend to publish.
|
|
@@ -224,14 +220,22 @@ async def donate(request: Request):
|
|
| 224 |
|
| 225 |
# --- open the PR on the contributor's behalf ---------------------------
|
| 226 |
try:
|
| 227 |
-
pr_url = _open_pr(cleaned, harness, filename)
|
| 228 |
except Exception as e: # noqa: BLE001 — surface a clean message to the skill
|
| 229 |
return JSONResponse({"error": "publish_failed", "detail": str(e)}, status_code=502)
|
| 230 |
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
-
def _open_pr(cleaned_text, harness, filename):
|
| 235 |
"""Open a PR to the dataset with the cleaned trace, under the project token."""
|
| 236 |
from huggingface_hub import HfApi, CommitOperationAdd
|
| 237 |
|
|
@@ -240,12 +244,20 @@ def _open_pr(cleaned_text, harness, filename):
|
|
| 240 |
path_in_repo=f"sessions/{harness}/{filename}",
|
| 241 |
path_or_fileobj=io.BytesIO(cleaned_text.encode("utf-8")),
|
| 242 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
commit = api.create_commit(
|
| 244 |
repo_id=DATASET_REPO,
|
| 245 |
repo_type="dataset",
|
| 246 |
operations=[op],
|
| 247 |
commit_message=f"Donate {harness} trace ({filename})",
|
| 248 |
-
commit_description=
|
| 249 |
create_pr=True,
|
| 250 |
)
|
| 251 |
# create_commit returns an object whose pr_url is set when create_pr=True
|
|
|
|
| 197 |
status_code=422,
|
| 198 |
)
|
| 199 |
|
| 200 |
+
# --- TruffleHog soft-warn pass over the scrubbed trace ------------------
|
| 201 |
+
# Catches what the regex pass cannot (vendor tokens with no fixed prefix).
|
| 202 |
+
# Run WITHOUT verification so it never transmits candidate secrets — which
|
| 203 |
+
# means occasional false positives (e.g. a 32-char hash read as a "Box"
|
| 204 |
+
# token). It therefore does NOT auto-reject: findings are surfaced to the
|
| 205 |
+
# contributor in the response and recorded on the PR for the maintainer to
|
| 206 |
+
# review, on top of the human review every donation already gets. The regex
|
| 207 |
+
# pass above stays the hard block for crisp, high-confidence secret formats.
|
| 208 |
th_detectors = trufflehog_findings(cleaned)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
if not HF_TOKEN or not DATASET_REPO:
|
| 211 |
# Not yet configured — accept-validate but don't pretend to publish.
|
|
|
|
| 220 |
|
| 221 |
# --- open the PR on the contributor's behalf ---------------------------
|
| 222 |
try:
|
| 223 |
+
pr_url = _open_pr(cleaned, harness, filename, warnings=th_detectors)
|
| 224 |
except Exception as e: # noqa: BLE001 — surface a clean message to the skill
|
| 225 |
return JSONResponse({"error": "publish_failed", "detail": str(e)}, status_code=502)
|
| 226 |
|
| 227 |
+
resp = {"status": "submitted", "pr_url": pr_url, "path": f"sessions/{harness}/{filename}"}
|
| 228 |
+
if th_detectors:
|
| 229 |
+
resp["warnings"] = {
|
| 230 |
+
"trufflehog_unverified": th_detectors,
|
| 231 |
+
"note": "TruffleHog flagged these without verification — often false positives "
|
| 232 |
+
"on high-entropy strings, so the donation was NOT blocked. Please confirm "
|
| 233 |
+
"none is a real secret; a maintainer will also review before merging.",
|
| 234 |
+
}
|
| 235 |
+
return resp
|
| 236 |
|
| 237 |
|
| 238 |
+
def _open_pr(cleaned_text, harness, filename, warnings=None):
|
| 239 |
"""Open a PR to the dataset with the cleaned trace, under the project token."""
|
| 240 |
from huggingface_hub import HfApi, CommitOperationAdd
|
| 241 |
|
|
|
|
| 244 |
path_in_repo=f"sessions/{harness}/{filename}",
|
| 245 |
path_or_fileobj=io.BytesIO(cleaned_text.encode("utf-8")),
|
| 246 |
)
|
| 247 |
+
description = "Anonymous donation via Trace Commons ingestion server."
|
| 248 |
+
if warnings:
|
| 249 |
+
description += (
|
| 250 |
+
"\n\n⚠️ **Maintainer review needed.** TruffleHog (unverified) flagged the "
|
| 251 |
+
"following detector(s). These are frequently false positives on high-entropy "
|
| 252 |
+
"strings (hashes, IDs, base64), but confirm none is a real secret before merging:\n- "
|
| 253 |
+
+ "\n- ".join(warnings)
|
| 254 |
+
)
|
| 255 |
commit = api.create_commit(
|
| 256 |
repo_id=DATASET_REPO,
|
| 257 |
repo_type="dataset",
|
| 258 |
operations=[op],
|
| 259 |
commit_message=f"Donate {harness} trace ({filename})",
|
| 260 |
+
commit_description=description,
|
| 261 |
create_pr=True,
|
| 262 |
)
|
| 263 |
# create_commit returns an object whose pr_url is set when create_pr=True
|