Secured

Sleeping

App Files Files Community

gowtham0992 Codex commited on 26 days ago

Commit

113d9bf

1 Parent(s): 26c0a94

Add sanitized field scam examples

Browse files

Co-authored-by: Codex <codex@openai.com>

Files changed (6) hide show

FIELD_NOTES.md +9 -0
README.md +1 -0
app.py +2 -1
eval/field_examples.jsonl +2 -0
jawbreaker/schema.py +31 -2
tests/test_eval_dataset.py +16 -0

FIELD_NOTES.md CHANGED Viewed

@@ -88,3 +88,12 @@ Guardrail: winning matters, but not by overclaiming. The project should not clai
 Modal credits are the right place to run the MiniCPM LoRA job. Added `training/modal_train.py` so the same generated train/dev split can run on an A100 with outputs stored in a Modal volume.
 Claim rule: Modal usage becomes a submission claim only after a real Modal run completes. Well-Tuned becomes a claim only after the resulting adapter is published and beats the base model or fallback on eval.

 Modal credits are the right place to run the MiniCPM LoRA job. Added `training/modal_train.py` so the same generated train/dev split can run on an A100 with outputs stored in a Modal volume.
 Claim rule: Modal usage becomes a submission claim only after a real Modal run completes. Well-Tuned becomes a claim only after the resulting adapter is published and beats the base model or fallback on eval.
+## 2026-06-06 Field Examples
+Added two sanitized real-world scam examples from a friend:
+- Coinbase account phone-number update callback lure
+- TikTok Shop part-time assistant / WhatsApp job lure
+Names, timestamps, and phone numbers were removed before committing. These examples are useful demo candidates because they are realistic, recent, and easier to explain than fully synthetic samples.

README.md CHANGED Viewed

@@ -79,6 +79,7 @@ Safety architecture:
 Training/eval artifacts:
 - `eval/scam_eval.jsonl`: 100 hand-curated synthetic/sanitized eval cases.
 - `training/generate_jawbreaker_data.py`: deterministic generator for larger train/dev/test splits.
 - `training/data/train.jsonl`, `dev.jsonl`, `test.jsonl`: generated SFT records for Jawbreaker JSON behavior.
 - `eval/generated_eval.jsonl`: generated holdout eval set.

 Training/eval artifacts:
 - `eval/scam_eval.jsonl`: 100 hand-curated synthetic/sanitized eval cases.
+- `eval/field_examples.jsonl`: sanitized real-world examples from a friend, with names and phone numbers removed.
 - `training/generate_jawbreaker_data.py`: deterministic generator for larger train/dev/test splits.
 - `training/data/train.jsonl`, `dev.jsonl`, `test.jsonl`: generated SFT records for Jawbreaker JSON behavior.
 - `eval/generated_eval.jsonl`: generated holdout eval set.

app.py CHANGED Viewed

@@ -25,7 +25,8 @@ except ImportError:
 EXAMPLES = [
     "USPS: Your package is held due to an unpaid fee. Verify now: http://usps-track-secure.example",
     "Hi Grandma, I lost my phone. This is my new number. Can you send $800 for rent today? Please don't tell Mom.",
-    "Chase fraud alert: Did you attempt a $249.00 purchase at TARGET? Reply YES or NO.",
 ]
 FORCE_LIGHT_JS = """() => {

 EXAMPLES = [
     "USPS: Your package is held due to an unpaid fee. Verify now: http://usps-track-secure.example",
     "Hi Grandma, I lost my phone. This is my new number. Can you send $800 for rent today? Please don't tell Mom.",
+    "Coinbase alert: We received a request to update the phone number on your account. If this wasn't you, call support immediately at [callback number].",
+    "Hi! I'm a recruiter from TikTok Shop. We are looking for a part-time assistant. Flexible remote work, 60 minutes per day, $330-$750 per day. Contact me on WhatsApp at [phone number].",
 ]
 FORCE_LIGHT_JS = """() => {

eval/field_examples.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"id":"field_coinbase_callback_001","category":"field_callback_phishing","input":"Coinbase alert: We received a request to update the phone number on your account. If this wasn't you, call support immediately at [callback number].","expected_risk_level":"dangerous","expected_scam_type":"callback_phishing","expected_tactics":["fake authority","urgency","callback request","account takeover"]}
2	+ {"id":"field_tiktok_shop_job_001","category":"field_job_scam","input":"Hi! I'm a recruiter from TikTok Shop. We are looking for a part-time assistant. This is a flexible remote opportunity that takes about 60 minutes per day. Pay is $330-$750 per day and payment is made immediately after each task. If interested, contact me on WhatsApp at [phone number].","expected_risk_level":"dangerous","expected_scam_type":"job_scam","expected_tactics":["fake job","too good to be true","off-platform contact","payment bait"]}

jawbreaker/schema.py CHANGED Viewed

@@ -45,6 +45,7 @@ class ScamAnalysis:
             "bank login",
             "seed phrase",
             "username",
         ]
         if any(token in text for token in credential_tokens):
             risk_level = "dangerous"
@@ -105,22 +106,46 @@ class ScamAnalysis:
                 "refund tool",
                 "call this number",
                 "call the support number",
                 "number shown",
             ]
         ):
             risk_level = "dangerous"
             scam_type = "tech_support"
             tactics.extend(["fake authority", "remote access request"])
         if any(token in text for token in ["prize", "lottery", "sweepstakes", "winner selected", "government grant"]):
             risk_level = "dangerous"
             scam_type = "prize_scam"
             tactics.extend(["too good to be true", "fake authority"])
-        if any(token in text for token in ["you are hired", "job offer", "training starts", "equipment shipment", "payroll setup"]):
             risk_level = "dangerous"
             scam_type = "job_scam"
             tactics.extend(["fake job", "fake authority"])
         if any(token in text for token in ["feel so close", "about us", "we can meet", "wallet was stolen"]):
             risk_level = "dangerous"
@@ -189,11 +214,13 @@ def _guess_impersonation(text: str) -> str:
         return "USPS or package carrier"
     if "chase" in text or "bank" in text:
         return "Bank or financial institution"
     if any(token in text for token in ["grandma", "grandpa", "niece", "new number", "changed numbers"]):
         return "Family member"
     if "support" in text or "technician" in text:
         return "Tech support"
-    if "recruiter" in text or "hiring" in text or "you are hired" in text:
         return "Employer or recruiter"
     if "prize" in text or "lottery" in text or "grant" in text:
         return "Prize or grant office"
@@ -223,6 +250,8 @@ def _guess_ask(text: str, scam_type: str) -> str:
         return "Pay or share details to claim prize"
     if scam_type == "job_scam":
         return "Pay or move money for job"
     if "http://" in text or "https://" in text:
         return "Open a link"
     return "No direct ask found"

             "bank login",
             "seed phrase",
             "username",
+            "phone number on your account",
         ]
         if any(token in text for token in credential_tokens):
             risk_level = "dangerous"
                 "refund tool",
                 "call this number",
                 "call the support number",
+                "call support",
                 "number shown",
+                "callback number",
             ]
         ):
             risk_level = "dangerous"
             scam_type = "tech_support"
             tactics.extend(["fake authority", "remote access request"])
+        if any(token in text for token in ["coinbase", "crypto account", "account update"]) and any(
+            token in text for token in ["call support", "callback number", "contact support immediately"]
+        ):
+            risk_level = "dangerous"
+            scam_type = "callback_phishing"
+            tactics.extend(["fake authority", "callback request", "account takeover"])
         if any(token in text for token in ["prize", "lottery", "sweepstakes", "winner selected", "government grant"]):
             risk_level = "dangerous"
             scam_type = "prize_scam"
             tactics.extend(["too good to be true", "fake authority"])
+        if any(
+            token in text
+            for token in [
+                "you are hired",
+                "job offer",
+                "training starts",
+                "equipment shipment",
+                "payroll setup",
+                "recruiter",
+                "part-time assistant",
+                "tiktok shop",
+                "whatsapp",
+            ]
+        ):
             risk_level = "dangerous"
             scam_type = "job_scam"
             tactics.extend(["fake job", "fake authority"])
+            if any(token in text for token in ["per day", "$330", "$750", "payment is made immediately"]):
+                tactics.append("too good to be true")
         if any(token in text for token in ["feel so close", "about us", "we can meet", "wallet was stolen"]):
             risk_level = "dangerous"
         return "USPS or package carrier"
     if "chase" in text or "bank" in text:
         return "Bank or financial institution"
+    if "coinbase" in text:
+        return "Coinbase or crypto platform"
     if any(token in text for token in ["grandma", "grandpa", "niece", "new number", "changed numbers"]):
         return "Family member"
     if "support" in text or "technician" in text:
         return "Tech support"
+    if "recruiter" in text or "hiring" in text or "you are hired" in text or "tiktok shop" in text:
         return "Employer or recruiter"
     if "prize" in text or "lottery" in text or "grant" in text:
         return "Prize or grant office"
         return "Pay or share details to claim prize"
     if scam_type == "job_scam":
         return "Pay or move money for job"
+    if scam_type == "callback_phishing":
+        return "Call a number from the message"
     if "http://" in text or "https://" in text:
         return "Open a link"
     return "No direct ask found"

tests/test_eval_dataset.py CHANGED Viewed

@@ -64,3 +64,19 @@ def test_generated_eval_has_test_count_and_safe_urls() -> None:
     for row in rows:
         assert row["expected_risk_level"] in RISK_LEVELS
         assert ".example" in row["input"] or "http" not in row["input"].lower()

     for row in rows:
         assert row["expected_risk_level"] in RISK_LEVELS
         assert ".example" in row["input"] or "http" not in row["input"].lower()
+def test_field_examples_are_sanitized_and_valid() -> None:
+    rows = [
+        json.loads(line)
+        for line in Path("eval/field_examples.jsonl").read_text(encoding="utf-8").splitlines()
+        if line.strip()
+    ]
+    assert len(rows) >= 2
+    for row in rows:
+        assert {"id", "category", "input", "expected_risk_level", "expected_scam_type", "expected_tactics"} <= set(row)
+        assert row["expected_risk_level"] in RISK_LEVELS
+        assert "[phone number]" in row["input"] or "[callback number]" in row["input"]
+        assert "Vineel" not in row["input"]
+        assert "+1" not in row["input"]