Spaces:

DevikaJ2005
/

fraudshield-1

Sleeping

App Files Files Community

DevikaJ2005 commited on Apr 26

Commit

e97acd1

1 Parent(s): 9672a3e

Add training-first RL architecture with tracking

Browse files

Files changed (3) hide show

configs/colab_qlora_grpo.json +4 -4
notebooks/fraudshield_trl_colab.ipynb +94 -32
train.py +106 -4

configs/colab_qlora_grpo.json CHANGED Viewed

@@ -24,13 +24,13 @@
     "checkpoint_dir": "artifacts/rl_runs/colab_qlora_grpo/checkpoints",
     "save_to_drive": true,
     "drive_dir": "/content/drive/MyDrive/fraudshield",
-    "num_train_epochs": 2,
     "per_device_train_batch_size": 2,
     "gradient_accumulation_steps": 4,
-    "learning_rate": 0.0001,
     "eval_every_steps": 10,
     "save_every_steps": 20,
-    "warmstart_rollouts_per_task": 24,
     "rl_rollouts_per_task": 8,
     "max_prompt_tokens": 2048,
     "max_completion_tokens": 220,
@@ -41,7 +41,7 @@
     "run_name": "fraudshield-colab-run",
     "resume_from_checkpoint": null,
     "public_curriculum_dataset": "Phoenix21/mock_fraud-detection-dataset",
-    "public_curriculum_rows": 2500
   },
   "evaluation": {
     "tasks": [

     "checkpoint_dir": "artifacts/rl_runs/colab_qlora_grpo/checkpoints",
     "save_to_drive": true,
     "drive_dir": "/content/drive/MyDrive/fraudshield",
+    "num_train_epochs": 3,
     "per_device_train_batch_size": 2,
     "gradient_accumulation_steps": 4,
+    "learning_rate": 5e-05,
     "eval_every_steps": 10,
     "save_every_steps": 20,
+    "warmstart_rollouts_per_task": 60,
     "rl_rollouts_per_task": 8,
     "max_prompt_tokens": 2048,
     "max_completion_tokens": 220,
     "run_name": "fraudshield-colab-run",
     "resume_from_checkpoint": null,
     "public_curriculum_dataset": "Phoenix21/mock_fraud-detection-dataset",
+    "public_curriculum_rows": 500
   },
   "evaluation": {
     "tasks": [

notebooks/fraudshield_trl_colab.ipynb CHANGED Viewed

@@ -8,14 +8,25 @@
       "source": [
         "# FraudShield Colab Training Notebook\n",
         "\n",
-        "This notebook trains an **open-source LLM policy** for FraudShield using a two-stage curriculum:\n",
         "\n",
-        "1. **Public fraud-data adaptation** from a Hugging Face dataset\n",
-        "2. **FraudShield policy adaptation** from environment-compatible action traces\n",
         "\n",
         "The goal is to learn more than a static heuristic by giving the model broader fraud signals first, then teaching it how to act inside the FraudShield workflow.\n"
       ],
       "id": "Wadw-uDzhxuI"
     },
     {
       "cell_type": "code",
@@ -98,18 +109,24 @@
       ],
       "source": [
         "%pip uninstall -y unsloth unsloth_zoo trl transformers tokenizers\n",
-        "%pip install -q openenv-core datasets peft accelerate sentencepiece matplotlib pandas\n",
-        "%pip install -q \"transformers==4.51.3\" \"trl==0.19.1\"\n",
         "%pip install -q \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
         "\n",
         "%cd /content\n",
         "!rm -rf Fraudshield\n",
         "!git clone https://github.com/DevikaJ2005/Fraudshield.git\n",
         "%cd /content/Fraudshield\n",
         "!ls\n",
         "%pip install -q -e .\n"
       ],
       "id": "yqcGck2nhxuN"
     },
     {
       "cell_type": "code",
@@ -121,18 +138,22 @@
       "source": [
         "import os\n",
         "from getpass import getpass\n",
-        "\n",
         "from huggingface_hub import login\n",
         "\n",
-        "token = getpass('Enter your HF token (optional but recommended): ')\n",
         "if token.strip():\n",
         "    os.environ['HF_TOKEN'] = token.strip()\n",
         "    login(token=token.strip())\n",
         "    print('HF login completed.')\n",
         "else:\n",
         "    print('Skipping HF login for now.')\n"
       ],
       "id": "s4fNpOrHhxuP"
     },
     {
       "cell_type": "code",
@@ -143,15 +164,19 @@
       "outputs": [],
       "source": [
         "import torch\n",
-        "\n",
         "print('cuda available:', torch.cuda.is_available())\n",
         "print('device count:', torch.cuda.device_count())\n",
         "if torch.cuda.is_available():\n",
         "    print('gpu name:', torch.cuda.get_device_name(0))\n",
         "else:\n",
         "    raise RuntimeError('GPU not available. In Colab, set Runtime > Change runtime type > GPU, then restart.')\n"
       ],
       "id": "ezOjPxWHhxuQ"
     },
     {
       "cell_type": "code",
@@ -162,30 +187,46 @@
       "outputs": [],
       "source": [
         "import json\n",
-        "import os\n",
-        "import random\n",
-        "import subprocess\n",
-        "from datetime import datetime\n",
-        "\n",
-        "import pandas as pd\n",
-        "from datasets import Dataset, load_dataset\n",
         "\n",
-        "from fraudshield_env import FraudShieldEnvironment\n",
-        "from llm_agent import SnapshotCalibratedFraudDetectionAgent\n",
-        "\n",
-        "env = FraudShieldEnvironment(data_path='data', seed=42)\n",
-        "assert env.load_data(), 'FraudShield snapshot failed to load.'\n",
-        "print('FraudShield loaded:', env.data_loader.get_bundle_summary())\n",
-        "\n",
-        "random.seed(42)\n",
         "\n",
-        "CANONICAL_ALIASES = [\n",
-        "    'merchant_profile',\n",
-        "    'customer_profile',\n",
-        "    'network_graph',\n",
-        "    'payment_trace',\n",
-        "    'policy_review',\n",
         "]\n",
         "\n",
         "\n",
         "def serialize_observation(observation):\n",
@@ -366,6 +407,14 @@
         "print(fraudshield_dataset[0]['text'][:900])\n"
       ],
       "id": "b6x653wbhxuR"
     },
     {
       "cell_type": "code",
@@ -375,11 +424,19 @@
       },
       "outputs": [],
       "source": [
-        "from unsloth import FastLanguageModel\n",
         "\n",
-        "MODEL_NAME = 'unsloth/Qwen2.5-1.5B-Instruct'\n",
-        "MAX_SEQ_LENGTH = 2048\n",
         "\n",
         "model, tokenizer = FastLanguageModel.from_pretrained(\n",
         "    model_name=MODEL_NAME,\n",
         "    max_seq_length=MAX_SEQ_LENGTH,\n",
@@ -583,6 +640,11 @@
         "print('Artifacts saved: reward_curve.png, loss_curve.png, training_summary.json')\n"
       ],
       "id": "F-zz_6TYhxuV"
     }
   ],
   "metadata": {

       "source": [
         "# FraudShield Colab Training Notebook\n",
         "\n",
+        "This notebook runs the **training-first FraudShield stack** directly from the repo.\n",
         "\n",
+        "It uses:\n",
+        "- `train.py` for Colab-friendly curriculum + QLoRA training\n",
+        "- `evaluate.py` for fixed-task evaluation\n",
+        "- `configs/colab_qlora_grpo.json` for reproducible settings\n",
         "\n",
+<<<<<<< HEAD
         "The goal is to learn more than a static heuristic by giving the model broader fraud signals first, then teaching it how to act inside the FraudShield workflow.\n"
       ],
       "id": "Wadw-uDzhxuI"
+=======
+        "The current setup is tuned to favor **FraudShield workflow learning** over generic imitation:\n",
+        "- fewer public curriculum rows\n",
+        "- more expert FraudShield rollouts\n",
+        "- lower learning rate\n",
+        "- longer stage-2 adaptation\n"
+      ]
+>>>>>>> 43cc51d (Use expert teacher rollouts for stronger retraining)
     },
     {
       "cell_type": "code",
       ],
       "source": [
         "%pip uninstall -y unsloth unsloth_zoo trl transformers tokenizers\n",
+        "%pip install -q -U pip\n",
+        "%pip install -q openenv-core matplotlib pandas\n",
+        "%pip install -q -e \".[rl]\"\n",
         "%pip install -q \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
         "\n",
         "%cd /content\n",
         "!rm -rf Fraudshield\n",
         "!git clone https://github.com/DevikaJ2005/Fraudshield.git\n",
         "%cd /content/Fraudshield\n",
+<<<<<<< HEAD
         "!ls\n",
         "%pip install -q -e .\n"
       ],
       "id": "yqcGck2nhxuN"
+=======
+        "!pip install -q -e \".[rl]\"\n"
+      ]
+>>>>>>> 43cc51d (Use expert teacher rollouts for stronger retraining)
     },
     {
       "cell_type": "code",
       "source": [
         "import os\n",
         "from getpass import getpass\n",
         "from huggingface_hub import login\n",
         "\n",
+        "token = getpass('Enter your HF token (recommended): ')\n",
         "if token.strip():\n",
         "    os.environ['HF_TOKEN'] = token.strip()\n",
         "    login(token=token.strip())\n",
         "    print('HF login completed.')\n",
         "else:\n",
+<<<<<<< HEAD
         "    print('Skipping HF login for now.')\n"
       ],
       "id": "s4fNpOrHhxuP"
+=======
+        "    print('No HF token provided.')\n"
+      ]
+>>>>>>> 43cc51d (Use expert teacher rollouts for stronger retraining)
     },
     {
       "cell_type": "code",
       "outputs": [],
       "source": [
         "import torch\n",
         "print('cuda available:', torch.cuda.is_available())\n",
         "print('device count:', torch.cuda.device_count())\n",
         "if torch.cuda.is_available():\n",
         "    print('gpu name:', torch.cuda.get_device_name(0))\n",
         "else:\n",
+<<<<<<< HEAD
         "    raise RuntimeError('GPU not available. In Colab, set Runtime > Change runtime type > GPU, then restart.')\n"
       ],
       "id": "ezOjPxWHhxuQ"
+=======
+        "    raise RuntimeError('GPU not available. Enable GPU in Runtime > Change runtime type, then restart.')\n"
+      ]
+>>>>>>> 43cc51d (Use expert teacher rollouts for stronger retraining)
     },
     {
       "cell_type": "code",
       "outputs": [],
       "source": [
         "import json\n",
+        "from pathlib import Path\n",
         "\n",
+        "config_path = Path('configs/colab_qlora_grpo.json')\n",
+        "config = json.loads(config_path.read_text(encoding='utf-8'))\n",
+        "print(json.dumps(config, indent=2))\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!python train.py --config configs/colab_qlora_grpo.json"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!python evaluate.py --config configs/colab_qlora_grpo.json"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from IPython.display import Image, display\n",
+        "!find artifacts -name \"*.png\" -o -name \"*.json\" | sort\n",
         "\n",
+        "paths = [\n",
+        "    'artifacts/rl_runs/colab_qlora_grpo/loss_vs_steps.png',\n",
+        "    'artifacts/rl_runs/colab_qlora_grpo/reward_vs_steps.png',\n",
+        "    'artifacts/plots/evaluation_rewards.png',\n",
         "]\n",
+<<<<<<< HEAD
         "\n",
         "\n",
         "def serialize_observation(observation):\n",
         "print(fraudshield_dataset[0]['text'][:900])\n"
       ],
       "id": "b6x653wbhxuR"
+=======
+        "for path in paths:\n",
+        "    try:\n",
+        "        display(Image(path))\n",
+        "    except Exception as exc:\n",
+        "        print('Could not display', path, exc)\n"
+      ]
+>>>>>>> 43cc51d (Use expert teacher rollouts for stronger retraining)
     },
     {
       "cell_type": "code",
       },
       "outputs": [],
       "source": [
+        "import json\n",
+        "from pathlib import Path\n",
         "\n",
+        "summary_candidates = [\n",
+        "    Path('artifacts/rl_runs/colab_qlora_grpo/training_run_summary.json'),\n",
+        "    Path('artifacts/rl_runs/colab_qlora_grpo/evaluation_report.json'),\n",
+        "]\n",
+        "for candidate in summary_candidates:\n",
+        "    if candidate.exists():\n",
+        "        print(f'===== {candidate} =====')\n",
+        "        print(json.dumps(json.loads(candidate.read_text(encoding='utf-8')), indent=2)[:12000])\n",
         "\n",
+<<<<<<< HEAD
         "model, tokenizer = FastLanguageModel.from_pretrained(\n",
         "    model_name=MODEL_NAME,\n",
         "    max_seq_length=MAX_SEQ_LENGTH,\n",
         "print('Artifacts saved: reward_curve.png, loss_curve.png, training_summary.json')\n"
       ],
       "id": "F-zz_6TYhxuV"
+=======
+        "!zip -r fraudshield_training_outputs.zip artifacts/rl_runs/colab_qlora_grpo artifacts/plots\n",
+        "print('Created fraudshield_training_outputs.zip')\n"
+      ]
+>>>>>>> 43cc51d (Use expert teacher rollouts for stronger retraining)
     }
   ],
   "metadata": {

train.py CHANGED Viewed

@@ -14,9 +14,111 @@ from datasets import Dataset, load_dataset
 from config import ExperimentConfig
 from environment import FraudShieldTextEnvironment
-from llm_agent import SnapshotCalibratedFraudDetectionAgent
 from utils import ensure_dir, save_json, seed_everything
 def build_public_curriculum(config: ExperimentConfig) -> Dataset:
     """Load public fraud examples and convert them into action-centric prompts."""
@@ -56,17 +158,17 @@ def build_public_curriculum(config: ExperimentConfig) -> Dataset:
 def build_rollout_dataset(config: ExperimentConfig) -> Dataset:
-    """Generate environment-compatible trajectories from the calibrated baseline."""
     text_env = FraudShieldTextEnvironment(config.environment, config.reward_weights)
-    agent = SnapshotCalibratedFraudDetectionAgent()
     rows: list[dict[str, Any]] = []
     for task_name in config.evaluation.tasks:
         for _ in range(config.training.warmstart_rollouts_per_task):
             prompt = text_env.reset(task=task_name)
             done = False
             while not done:
-                action = agent.decide(text_env.current_observation)
                 payload = {
                     "action_type": "decide" if action.action_type.value == "resolve_case" else "investigate",
                     "investigation_target": action.action_type.value,

 from config import ExperimentConfig
 from environment import FraudShieldTextEnvironment
+from models import ActionTypeEnum, FraudCheckAction
 from utils import ensure_dir, save_json, seed_everything
+class ExpertCurriculumTeacher:
+    """Teacher policy that uses hidden task structure to generate stronger trajectories."""
+    def decide(self, text_env: FraudShieldTextEnvironment) -> FraudCheckAction:
+        observation = text_env.current_observation
+        case_id = observation.case_id
+        revealed = observation.revealed_evidence
+        case = text_env.env.workflow_cases[case_id]
+        budget = int(observation.app_context.get("investigation_budget_remaining", 0))
+        if "transaction_review" not in revealed:
+            return FraudCheckAction(
+                case_id=case_id,
+                action_type=ActionTypeEnum.REVIEW_TRANSACTION,
+                reasoning="Open the transaction details before taking any deeper investigative step.",
+            )
+        planned_sequence = self._planned_evidence_sequence(case)
+        for evidence_key, action_type, reasoning in planned_sequence:
+            if evidence_key not in revealed and budget > 0:
+                return FraudCheckAction(case_id=case_id, action_type=action_type, reasoning=reasoning)
+        if observation.note_required:
+            return FraudCheckAction(
+                case_id=case_id,
+                action_type=ActionTypeEnum.ADD_CASE_NOTE,
+                note_text=self._case_note(case),
+            )
+        return FraudCheckAction(
+            case_id=case_id,
+            action_type=ActionTypeEnum.RESOLVE_CASE,
+            resolution=case["correct_resolution"],
+            reasoning=self._resolution_reasoning(case),
+        )
+    def _planned_evidence_sequence(self, case: dict[str, Any]) -> list[tuple[str, ActionTypeEnum, str]]:
+        role = case["role"]
+        task_specific = [
+            (
+                "customer_profile",
+                ActionTypeEnum.FETCH_CUSTOMER_PROFILE,
+                "Customer history is needed to understand whether this pattern reflects risky buyer behavior.",
+            ),
+            (
+                "merchant_profile",
+                ActionTypeEnum.FETCH_MERCHANT_PROFILE,
+                "Merchant health helps explain whether the case risk comes from the seller side.",
+            ),
+            (
+                "network_graph",
+                ActionTypeEnum.FETCH_NETWORK_GRAPH,
+                "Linked-activity evidence is needed to confirm whether this case participates in a broader cluster.",
+            ),
+            (
+                "policy_guide",
+                ActionTypeEnum.CHECK_POLICY,
+                "Policy guidance is required before choosing the final route.",
+            ),
+        ]
+        if role == "single" and case["correct_resolution"].value == "request_docs":
+            return [
+                task_specific[0],
+                task_specific[3],
+                task_specific[1],
+            ]
+        if role == "primary":
+            return [
+                task_specific[2],
+                task_specific[1],
+                task_specific[3],
+            ]
+        if role == "secondary":
+            return [
+                task_specific[2],
+                task_specific[0],
+                task_specific[3],
+            ]
+        return [
+            task_specific[1],
+        ]
+    def _case_note(self, case: dict[str, Any]) -> str:
+        if case["role"] == "primary":
+            return "Reviewed the transaction trace, graph evidence, merchant signals, and policy guidance before escalating the linked primary case."
+        if case["role"] == "secondary":
+            return "Reviewed the transaction trace, graph evidence, customer history, and policy guidance before finalizing the linked secondary case."
+        if case["correct_resolution"].value == "request_docs":
+            return "Reviewed transaction, customer, merchant, and policy evidence before requesting more supporting documents."
+        return "Reviewed the transaction evidence and documented the case before final routing."
+    def _resolution_reasoning(self, case: dict[str, Any]) -> str:
+        mapping = {
+            "approve": "The collected evidence supports approval without additional intervention.",
+            "block": "The combined evidence supports blocking the transaction as high risk.",
+            "hold": "The evidence remains risky enough to hold the case for more controlled handling.",
+            "request_docs": "The case is ambiguous enough that supporting documents are the safest next step.",
+            "escalate": "The linked-cluster evidence and loss risk justify escalation to a higher-touch reviewer.",
+        }
+        return mapping[case["correct_resolution"].value]
 def build_public_curriculum(config: ExperimentConfig) -> Dataset:
     """Load public fraud examples and convert them into action-centric prompts."""
 def build_rollout_dataset(config: ExperimentConfig) -> Dataset:
+    """Generate environment-compatible trajectories from an expert teacher."""
     text_env = FraudShieldTextEnvironment(config.environment, config.reward_weights)
+    agent = ExpertCurriculumTeacher()
     rows: list[dict[str, Any]] = []
     for task_name in config.evaluation.tasks:
         for _ in range(config.training.warmstart_rollouts_per_task):
             prompt = text_env.reset(task=task_name)
             done = False
             while not done:
+                action = agent.decide(text_env)
                 payload = {
                     "action_type": "decide" if action.action_type.value == "resolve_case" else "investigate",
                     "investigation_target": action.action_type.value,