Commit ·
113d9bf
1
Parent(s): 26c0a94
Add sanitized field scam examples
Browse filesCo-authored-by: Codex <codex@openai.com>
- FIELD_NOTES.md +9 -0
- README.md +1 -0
- app.py +2 -1
- eval/field_examples.jsonl +2 -0
- jawbreaker/schema.py +31 -2
- tests/test_eval_dataset.py +16 -0
FIELD_NOTES.md
CHANGED
|
@@ -88,3 +88,12 @@ Guardrail: winning matters, but not by overclaiming. The project should not clai
|
|
| 88 |
Modal credits are the right place to run the MiniCPM LoRA job. Added `training/modal_train.py` so the same generated train/dev split can run on an A100 with outputs stored in a Modal volume.
|
| 89 |
|
| 90 |
Claim rule: Modal usage becomes a submission claim only after a real Modal run completes. Well-Tuned becomes a claim only after the resulting adapter is published and beats the base model or fallback on eval.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
Modal credits are the right place to run the MiniCPM LoRA job. Added `training/modal_train.py` so the same generated train/dev split can run on an A100 with outputs stored in a Modal volume.
|
| 89 |
|
| 90 |
Claim rule: Modal usage becomes a submission claim only after a real Modal run completes. Well-Tuned becomes a claim only after the resulting adapter is published and beats the base model or fallback on eval.
|
| 91 |
+
|
| 92 |
+
## 2026-06-06 Field Examples
|
| 93 |
+
|
| 94 |
+
Added two sanitized real-world scam examples from a friend:
|
| 95 |
+
|
| 96 |
+
- Coinbase account phone-number update callback lure
|
| 97 |
+
- TikTok Shop part-time assistant / WhatsApp job lure
|
| 98 |
+
|
| 99 |
+
Names, timestamps, and phone numbers were removed before committing. These examples are useful demo candidates because they are realistic, recent, and easier to explain than fully synthetic samples.
|
README.md
CHANGED
|
@@ -79,6 +79,7 @@ Safety architecture:
|
|
| 79 |
Training/eval artifacts:
|
| 80 |
|
| 81 |
- `eval/scam_eval.jsonl`: 100 hand-curated synthetic/sanitized eval cases.
|
|
|
|
| 82 |
- `training/generate_jawbreaker_data.py`: deterministic generator for larger train/dev/test splits.
|
| 83 |
- `training/data/train.jsonl`, `dev.jsonl`, `test.jsonl`: generated SFT records for Jawbreaker JSON behavior.
|
| 84 |
- `eval/generated_eval.jsonl`: generated holdout eval set.
|
|
|
|
| 79 |
Training/eval artifacts:
|
| 80 |
|
| 81 |
- `eval/scam_eval.jsonl`: 100 hand-curated synthetic/sanitized eval cases.
|
| 82 |
+
- `eval/field_examples.jsonl`: sanitized real-world examples from a friend, with names and phone numbers removed.
|
| 83 |
- `training/generate_jawbreaker_data.py`: deterministic generator for larger train/dev/test splits.
|
| 84 |
- `training/data/train.jsonl`, `dev.jsonl`, `test.jsonl`: generated SFT records for Jawbreaker JSON behavior.
|
| 85 |
- `eval/generated_eval.jsonl`: generated holdout eval set.
|
app.py
CHANGED
|
@@ -25,7 +25,8 @@ except ImportError:
|
|
| 25 |
EXAMPLES = [
|
| 26 |
"USPS: Your package is held due to an unpaid fee. Verify now: http://usps-track-secure.example",
|
| 27 |
"Hi Grandma, I lost my phone. This is my new number. Can you send $800 for rent today? Please don't tell Mom.",
|
| 28 |
-
"
|
|
|
|
| 29 |
]
|
| 30 |
|
| 31 |
FORCE_LIGHT_JS = """() => {
|
|
|
|
| 25 |
EXAMPLES = [
|
| 26 |
"USPS: Your package is held due to an unpaid fee. Verify now: http://usps-track-secure.example",
|
| 27 |
"Hi Grandma, I lost my phone. This is my new number. Can you send $800 for rent today? Please don't tell Mom.",
|
| 28 |
+
"Coinbase alert: We received a request to update the phone number on your account. If this wasn't you, call support immediately at [callback number].",
|
| 29 |
+
"Hi! I'm a recruiter from TikTok Shop. We are looking for a part-time assistant. Flexible remote work, 60 minutes per day, $330-$750 per day. Contact me on WhatsApp at [phone number].",
|
| 30 |
]
|
| 31 |
|
| 32 |
FORCE_LIGHT_JS = """() => {
|
eval/field_examples.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"id":"field_coinbase_callback_001","category":"field_callback_phishing","input":"Coinbase alert: We received a request to update the phone number on your account. If this wasn't you, call support immediately at [callback number].","expected_risk_level":"dangerous","expected_scam_type":"callback_phishing","expected_tactics":["fake authority","urgency","callback request","account takeover"]}
|
| 2 |
+
{"id":"field_tiktok_shop_job_001","category":"field_job_scam","input":"Hi! I'm a recruiter from TikTok Shop. We are looking for a part-time assistant. This is a flexible remote opportunity that takes about 60 minutes per day. Pay is $330-$750 per day and payment is made immediately after each task. If interested, contact me on WhatsApp at [phone number].","expected_risk_level":"dangerous","expected_scam_type":"job_scam","expected_tactics":["fake job","too good to be true","off-platform contact","payment bait"]}
|
jawbreaker/schema.py
CHANGED
|
@@ -45,6 +45,7 @@ class ScamAnalysis:
|
|
| 45 |
"bank login",
|
| 46 |
"seed phrase",
|
| 47 |
"username",
|
|
|
|
| 48 |
]
|
| 49 |
if any(token in text for token in credential_tokens):
|
| 50 |
risk_level = "dangerous"
|
|
@@ -105,22 +106,46 @@ class ScamAnalysis:
|
|
| 105 |
"refund tool",
|
| 106 |
"call this number",
|
| 107 |
"call the support number",
|
|
|
|
| 108 |
"number shown",
|
|
|
|
| 109 |
]
|
| 110 |
):
|
| 111 |
risk_level = "dangerous"
|
| 112 |
scam_type = "tech_support"
|
| 113 |
tactics.extend(["fake authority", "remote access request"])
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
if any(token in text for token in ["prize", "lottery", "sweepstakes", "winner selected", "government grant"]):
|
| 116 |
risk_level = "dangerous"
|
| 117 |
scam_type = "prize_scam"
|
| 118 |
tactics.extend(["too good to be true", "fake authority"])
|
| 119 |
|
| 120 |
-
if any(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
risk_level = "dangerous"
|
| 122 |
scam_type = "job_scam"
|
| 123 |
tactics.extend(["fake job", "fake authority"])
|
|
|
|
|
|
|
| 124 |
|
| 125 |
if any(token in text for token in ["feel so close", "about us", "we can meet", "wallet was stolen"]):
|
| 126 |
risk_level = "dangerous"
|
|
@@ -189,11 +214,13 @@ def _guess_impersonation(text: str) -> str:
|
|
| 189 |
return "USPS or package carrier"
|
| 190 |
if "chase" in text or "bank" in text:
|
| 191 |
return "Bank or financial institution"
|
|
|
|
|
|
|
| 192 |
if any(token in text for token in ["grandma", "grandpa", "niece", "new number", "changed numbers"]):
|
| 193 |
return "Family member"
|
| 194 |
if "support" in text or "technician" in text:
|
| 195 |
return "Tech support"
|
| 196 |
-
if "recruiter" in text or "hiring" in text or "you are hired" in text:
|
| 197 |
return "Employer or recruiter"
|
| 198 |
if "prize" in text or "lottery" in text or "grant" in text:
|
| 199 |
return "Prize or grant office"
|
|
@@ -223,6 +250,8 @@ def _guess_ask(text: str, scam_type: str) -> str:
|
|
| 223 |
return "Pay or share details to claim prize"
|
| 224 |
if scam_type == "job_scam":
|
| 225 |
return "Pay or move money for job"
|
|
|
|
|
|
|
| 226 |
if "http://" in text or "https://" in text:
|
| 227 |
return "Open a link"
|
| 228 |
return "No direct ask found"
|
|
|
|
| 45 |
"bank login",
|
| 46 |
"seed phrase",
|
| 47 |
"username",
|
| 48 |
+
"phone number on your account",
|
| 49 |
]
|
| 50 |
if any(token in text for token in credential_tokens):
|
| 51 |
risk_level = "dangerous"
|
|
|
|
| 106 |
"refund tool",
|
| 107 |
"call this number",
|
| 108 |
"call the support number",
|
| 109 |
+
"call support",
|
| 110 |
"number shown",
|
| 111 |
+
"callback number",
|
| 112 |
]
|
| 113 |
):
|
| 114 |
risk_level = "dangerous"
|
| 115 |
scam_type = "tech_support"
|
| 116 |
tactics.extend(["fake authority", "remote access request"])
|
| 117 |
|
| 118 |
+
if any(token in text for token in ["coinbase", "crypto account", "account update"]) and any(
|
| 119 |
+
token in text for token in ["call support", "callback number", "contact support immediately"]
|
| 120 |
+
):
|
| 121 |
+
risk_level = "dangerous"
|
| 122 |
+
scam_type = "callback_phishing"
|
| 123 |
+
tactics.extend(["fake authority", "callback request", "account takeover"])
|
| 124 |
+
|
| 125 |
if any(token in text for token in ["prize", "lottery", "sweepstakes", "winner selected", "government grant"]):
|
| 126 |
risk_level = "dangerous"
|
| 127 |
scam_type = "prize_scam"
|
| 128 |
tactics.extend(["too good to be true", "fake authority"])
|
| 129 |
|
| 130 |
+
if any(
|
| 131 |
+
token in text
|
| 132 |
+
for token in [
|
| 133 |
+
"you are hired",
|
| 134 |
+
"job offer",
|
| 135 |
+
"training starts",
|
| 136 |
+
"equipment shipment",
|
| 137 |
+
"payroll setup",
|
| 138 |
+
"recruiter",
|
| 139 |
+
"part-time assistant",
|
| 140 |
+
"tiktok shop",
|
| 141 |
+
"whatsapp",
|
| 142 |
+
]
|
| 143 |
+
):
|
| 144 |
risk_level = "dangerous"
|
| 145 |
scam_type = "job_scam"
|
| 146 |
tactics.extend(["fake job", "fake authority"])
|
| 147 |
+
if any(token in text for token in ["per day", "$330", "$750", "payment is made immediately"]):
|
| 148 |
+
tactics.append("too good to be true")
|
| 149 |
|
| 150 |
if any(token in text for token in ["feel so close", "about us", "we can meet", "wallet was stolen"]):
|
| 151 |
risk_level = "dangerous"
|
|
|
|
| 214 |
return "USPS or package carrier"
|
| 215 |
if "chase" in text or "bank" in text:
|
| 216 |
return "Bank or financial institution"
|
| 217 |
+
if "coinbase" in text:
|
| 218 |
+
return "Coinbase or crypto platform"
|
| 219 |
if any(token in text for token in ["grandma", "grandpa", "niece", "new number", "changed numbers"]):
|
| 220 |
return "Family member"
|
| 221 |
if "support" in text or "technician" in text:
|
| 222 |
return "Tech support"
|
| 223 |
+
if "recruiter" in text or "hiring" in text or "you are hired" in text or "tiktok shop" in text:
|
| 224 |
return "Employer or recruiter"
|
| 225 |
if "prize" in text or "lottery" in text or "grant" in text:
|
| 226 |
return "Prize or grant office"
|
|
|
|
| 250 |
return "Pay or share details to claim prize"
|
| 251 |
if scam_type == "job_scam":
|
| 252 |
return "Pay or move money for job"
|
| 253 |
+
if scam_type == "callback_phishing":
|
| 254 |
+
return "Call a number from the message"
|
| 255 |
if "http://" in text or "https://" in text:
|
| 256 |
return "Open a link"
|
| 257 |
return "No direct ask found"
|
tests/test_eval_dataset.py
CHANGED
|
@@ -64,3 +64,19 @@ def test_generated_eval_has_test_count_and_safe_urls() -> None:
|
|
| 64 |
for row in rows:
|
| 65 |
assert row["expected_risk_level"] in RISK_LEVELS
|
| 66 |
assert ".example" in row["input"] or "http" not in row["input"].lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
for row in rows:
|
| 65 |
assert row["expected_risk_level"] in RISK_LEVELS
|
| 66 |
assert ".example" in row["input"] or "http" not in row["input"].lower()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def test_field_examples_are_sanitized_and_valid() -> None:
|
| 70 |
+
rows = [
|
| 71 |
+
json.loads(line)
|
| 72 |
+
for line in Path("eval/field_examples.jsonl").read_text(encoding="utf-8").splitlines()
|
| 73 |
+
if line.strip()
|
| 74 |
+
]
|
| 75 |
+
|
| 76 |
+
assert len(rows) >= 2
|
| 77 |
+
for row in rows:
|
| 78 |
+
assert {"id", "category", "input", "expected_risk_level", "expected_scam_type", "expected_tactics"} <= set(row)
|
| 79 |
+
assert row["expected_risk_level"] in RISK_LEVELS
|
| 80 |
+
assert "[phone number]" in row["input"] or "[callback number]" in row["input"]
|
| 81 |
+
assert "Vineel" not in row["input"]
|
| 82 |
+
assert "+1" not in row["input"]
|