{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cell-01-install",
   "metadata": {},
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mCanceled future for execute_request message before replies were done"
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mCanceled future for execute_request message before replies were done. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 1 — Install packages                                   ║\n",
    "# ║  RUN THIS CELL ALONE FIRST — it will auto-restart runtime    ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "import subprocess, sys, os\n",
    "\n",
    "def pip(*pkgs):\n",
    "    subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", *pkgs])\n",
    "\n",
    "# No pinned numpy/pandas — let Colab use its pre-installed compatible versions.\n",
    "# Pinning old numpy/pandas causes the 'mtrand ABI mismatch' ValueError.\n",
    "pip(\n",
    "    \"datasets>=2.18.0\",\n",
    "    \"transformers>=4.40.0\",\n",
    "    \"sentence-transformers>=2.7.0\",\n",
    "    \"scikit-learn>=1.4.0\",\n",
    "    \"tqdm>=4.66.0\",\n",
    "    \"accelerate>=0.26.0\",\n",
    "    \"evaluate\",\n",
    ")\n",
    "\n",
    "print(\"✅ Packages installed — restarting runtime now …\")\n",
    "os.kill(os.getpid(), 9)  # auto-restart; Colab reconnects in ~5 s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "22cff354",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m527.3/527.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.6/177.6 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "gcsfs 2025.3.0 requires fsspec==2025.3.0, but you have fsspec 2024.6.1 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip install -q \"datasets==2.21.0\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cell-02-gpu",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n",
      "CUDA available: True\n",
      "Device name:    Tesla T4\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 2 — Mount Drive + GPU check                            ║\n",
    "# ║  Run AFTER the runtime has restarted                         ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "from google.colab import drive\n",
    "drive.mount(\"/content/drive\")\n",
    "\n",
    "import torch\n",
    "print(\"CUDA available:\", torch.cuda.is_available())\n",
    "print(\"Device name:   \", torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\")\n",
    "assert torch.cuda.is_available(), \"❌ No GPU — set Runtime type to T4 GPU!\"\n",
    "DEVICE = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cell-03-config",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✅ Config ready  |  output → /content/drive/MyDrive/Athernex/nli_contract_model_final\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 3 — Config                                             ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "import os\n",
    "\n",
    "CNLI_SIZE  = 6820    # full ContractNLI train split\n",
    "MNLI_SIZE  = 50000   # pool; genre filter keeps ~8-10k government rows\n",
    "SYNTH_SIZE = 1000    # synthetic contradiction pairs\n",
    "\n",
    "BASE_MODEL = \"typeform/distilbert-base-uncased-mnli\"\n",
    "OUTPUT_DIR = \"/content/drive/MyDrive/Athernex/nli_contract_model_final\"\n",
    "EPOCHS     = 5\n",
    "BATCH_SIZE = 32      # T4 handles 32 at max_length=128\n",
    "LR         = 2e-5\n",
    "MAX_LEN    = 128\n",
    "\n",
    "LABEL2ID = {\"entailment\": 0, \"contradiction\": 1, \"neutral\": 2}\n",
    "ID2LABEL = {v: k for k, v in LABEL2ID.items()}\n",
    "\n",
    "os.makedirs(OUTPUT_DIR, exist_ok=True)\n",
    "print(f\"✅ Config ready  |  output → {OUTPUT_DIR}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cell-04-helpers",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✅ Data helpers defined.\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 4 — Data loading helpers                               ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "import re\n",
    "import pandas as pd\n",
    "from datasets import load_dataset\n",
    "\n",
    "def clean_clause(text: str) -> str:\n",
    "    text = re.sub(r'\\s+', ' ', text).strip()\n",
    "    text = re.sub(r'[^\\x00-\\x7F]+', '', text)\n",
    "    return text\n",
    "\n",
    "def load_contract_nli(split: str = \"train\", size: int = CNLI_SIZE):\n",
    "    \"\"\"Full ContractNLI — kiddothe2b/contract-nli, subset contractnli_a.\"\"\"\n",
    "    slice_str = f\"{split}[:{size}]\" if size else split\n",
    "    return load_dataset(\n",
    "        \"kiddothe2b/contract-nli\", \"contractnli_a\",\n",
    "        split=slice_str, trust_remote_code=True\n",
    "    )\n",
    "\n",
    "def process_contract_nli(dataset) -> pd.DataFrame:\n",
    "    \"\"\"kiddothe2b schema: 0=contradiction, 1=entailment, 2=neutral.\"\"\"\n",
    "    label_map = {0: \"contradiction\", 1: \"entailment\", 2: \"neutral\"}\n",
    "    records = []\n",
    "    for s in dataset:\n",
    "        p = clean_clause(s[\"premise\"])\n",
    "        h = clean_clause(s[\"hypothesis\"])\n",
    "        if len(p) < 20 or len(h) < 20:\n",
    "            continue\n",
    "        records.append({\"clause1\": p, \"clause2\": h,\n",
    "                         \"label\": label_map.get(s[\"label\"], \"neutral\")})\n",
    "    return pd.DataFrame(records)\n",
    "\n",
    "def load_mnli_government(split: str = \"train\", size: int = MNLI_SIZE):\n",
    "    \"\"\"MultiNLI filtered to government genre.\"\"\"\n",
    "    if split == \"validation\":\n",
    "        split = \"validation_matched\"\n",
    "    slice_str = f\"{split}[:{size}]\" if size else split\n",
    "    ds = load_dataset(\"nyu-mll/multi_nli\", split=slice_str, trust_remote_code=True)\n",
    "    return ds.filter(lambda x: x[\"genre\"] == \"government\")\n",
    "\n",
    "def process_mnli_government(dataset) -> pd.DataFrame:\n",
    "    \"\"\"MultiNLI schema: 0=entailment, 1=neutral, 2=contradiction.\"\"\"\n",
    "    label_map = {0: \"entailment\", 1: \"neutral\", 2: \"contradiction\"}\n",
    "    records = []\n",
    "    for s in dataset:\n",
    "        if not s[\"premise\"] or not s[\"hypothesis\"]:\n",
    "            continue\n",
    "        p = clean_clause(s[\"premise\"])\n",
    "        h = clean_clause(s[\"hypothesis\"])\n",
    "        if len(p) < 20 or len(h) < 20:\n",
    "            continue\n",
    "        records.append({\"clause1\": p, \"clause2\": h,\n",
    "                         \"label\": label_map.get(s[\"label\"], \"neutral\")})\n",
    "    return pd.DataFrame(records)\n",
    "\n",
    "NEGATION_MAP = {\n",
    "    \"shall\": \"shall not\", \"must\": \"must not\",\n",
    "    \"will\": \"will not\",   \"may\": \"may not\",\n",
    "    \"is required to\": \"is not required to\",\n",
    "    \"exclusive\": \"non-exclusive\", \"limited\": \"unlimited\",\n",
    "    \"terminate\": \"not terminate\",\n",
    "}\n",
    "\n",
    "def simulate_contradiction(clause: str):\n",
    "    for term, negated in NEGATION_MAP.items():\n",
    "        if term in clause.lower():\n",
    "            return re.sub(term, negated, clause, count=1, flags=re.IGNORECASE)\n",
    "    return None\n",
    "\n",
    "def build_synthetic_pairs(clauses: list, sample_size: int = SYNTH_SIZE) -> pd.DataFrame:\n",
    "    import random; random.seed(42)\n",
    "    sampled = random.sample(clauses, min(sample_size, len(clauses)))\n",
    "    records = []\n",
    "    for clause in sampled:\n",
    "        neg = simulate_contradiction(clause)\n",
    "        if neg:\n",
    "            records.append({\"clause1\": clause, \"clause2\": neg, \"label\": \"contradiction\"})\n",
    "    return pd.DataFrame(records)\n",
    "\n",
    "print(\"✅ Data helpers defined.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cell-05-build",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "=======================================================\n",
      "BUILDING FULL TRAINING DATA\n",
      "=======================================================\n",
      "\n",
      "[1/3] ContractNLI (size=6820) ...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:103: UserWarning: \n",
      "Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.\n",
      "You are not authenticated with the Hugging Face Hub in this notebook.\n",
      "If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3828de1843b244eab1864397b1be07ec",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/796k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d6569e0dd754662b7e7c7da5704ce01",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/213k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "99d706588b8f46b5bafd97aca5302e57",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/114k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cb547e43501246ab9077fd18cfa7e2ca",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/6819 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3bcc04612f034aa6afd1f4bcfeeb0890",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating test split:   0%|          | 0/1991 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "172e8bc3142d4905badd5e7c1b9dc649",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating validation split:   0%|          | 0/978 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  → 6819 pairs\n",
      "label\n",
      "entailment       3195\n",
      "neutral          2820\n",
      "contradiction     804\n",
      "\n",
      "[2/3] MultiNLI government (pool=50000) ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f20dccd17a3648cdb76685916aa5603d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading readme:   0%|          | 0.00/8.89k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a15041dfe77b4f8fbe005b02daa90f53",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/214M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a3bd313424c04fa69cb074c62aa20e5b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/4.94M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c2711b551a664e6d8e2680dbf96890ac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/5.10M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "acfa4ba975724a03af7232b72a6d07bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/392702 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f5d309c3902744d0921d0255f760c863",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating validation_matched split:   0%|          | 0/9815 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "82e9096c59c24f2a83f580b310a5255a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating validation_mismatched split:   0%|          | 0/9832 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "97ee834b40814d619522c5852d240c66",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/50000 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  → 9937 pairs\n",
      "label\n",
      "contradiction    3526\n",
      "entailment       3318\n",
      "neutral          3093\n",
      "\n",
      "[3/3] Synthetic contradictions (size=1000) ...\n",
      "  → 842 pairs\n",
      "\n",
      "=======================================================\n",
      "✅ Total training pairs: 17598\n",
      "label\n",
      "entailment       6513\n",
      "neutral          5913\n",
      "contradiction    5172\n",
      "=======================================================\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 5 — Build full training dataset                        ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "print(\"\\n\" + \"=\"*55)\n",
    "print(\"BUILDING FULL TRAINING DATA\")\n",
    "print(\"=\"*55)\n",
    "\n",
    "print(f\"\\n[1/3] ContractNLI (size={CNLI_SIZE}) ...\")\n",
    "cnli_raw = load_contract_nli(size=CNLI_SIZE)\n",
    "cnli_df  = process_contract_nli(cnli_raw)\n",
    "print(f\"  → {len(cnli_df)} pairs\")\n",
    "print(cnli_df[\"label\"].value_counts().to_string())\n",
    "\n",
    "print(f\"\\n[2/3] MultiNLI government (pool={MNLI_SIZE}) ...\")\n",
    "mnli_raw = load_mnli_government(size=MNLI_SIZE)\n",
    "mnli_df  = process_mnli_government(mnli_raw)\n",
    "print(f\"  → {len(mnli_df)} pairs\")\n",
    "print(mnli_df[\"label\"].value_counts().to_string())\n",
    "\n",
    "print(f\"\\n[3/3] Synthetic contradictions (size={SYNTH_SIZE}) ...\")\n",
    "synth_df = build_synthetic_pairs(cnli_df[\"clause1\"].tolist(), sample_size=SYNTH_SIZE)\n",
    "print(f\"  → {len(synth_df)} pairs\")\n",
    "\n",
    "valid_labels = {\"entailment\", \"contradiction\", \"neutral\"}\n",
    "full_df = (\n",
    "    pd.concat([cnli_df, mnli_df, synth_df], ignore_index=True)\n",
    "    .sample(frac=1, random_state=42)\n",
    ")\n",
    "full_df = full_df[full_df[\"label\"].isin(valid_labels)].dropna(\n",
    "    subset=[\"clause1\", \"clause2\", \"label\"])\n",
    "\n",
    "print(f\"\\n{'='*55}\")\n",
    "print(f\"✅ Total training pairs: {len(full_df)}\")\n",
    "print(full_df[\"label\"].value_counts().to_string())\n",
    "print(\"=\"*55)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "cell-06-tokenize",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n",
      "WARNING:huggingface_hub.utils._http:Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c4180a00e39f4b20a183ac5e50fcc535",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6acbaa79f2bc445a8dba7cbac5e0db63",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/258 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cf1904a867804acd804c1269a4cd0b36",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "vocab.txt: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c51bd902071d4089a6e35bce0008d300",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "736813af7da3439e801d423e723617c9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/14958 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "58c1e30f69f84c45a120c54a3d64865e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/2640 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train: 14958  |  Eval: 2640\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 6 — Tokenize & split                                   ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "from datasets import Dataset\n",
    "from transformers import AutoTokenizer\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)\n",
    "\n",
    "df_train = full_df.copy()\n",
    "df_train[\"label\"] = df_train[\"label\"].map(LABEL2ID)\n",
    "\n",
    "hf_ds  = Dataset.from_pandas(df_train[[\"clause1\", \"clause2\", \"label\"]])\n",
    "splits = hf_ds.train_test_split(test_size=0.15, seed=42)\n",
    "\n",
    "def tokenize_fn(batch):\n",
    "    return tokenizer(\n",
    "        batch[\"clause1\"], batch[\"clause2\"],\n",
    "        truncation=True, padding=\"max_length\", max_length=MAX_LEN\n",
    "    )\n",
    "\n",
    "tokenized = splits.map(tokenize_fn, batched=True, batch_size=256)\n",
    "# Drop raw text columns — keep only model inputs + label\n",
    "tokenized = tokenized.remove_columns([\"clause1\", \"clause2\"])\n",
    "tokenized.set_format(\"torch\")\n",
    "\n",
    "print(f\"Train: {len(tokenized['train'])}  |  Eval: {len(tokenized['test'])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "cell-07-model",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4ae908a15d7f40d1acfb2dc7bb152bdf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "882b3a80c47e48d2bd370b812ba97b85",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✅ Model & Trainer ready.\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 7 — Model & TrainingArguments                          ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "from transformers import (\n",
    "    AutoModelForSequenceClassification,\n",
    "    TrainingArguments,\n",
    "    Trainer,\n",
    "    EarlyStoppingCallback,\n",
    ")\n",
    "import numpy as np\n",
    "from sklearn.metrics import f1_score, accuracy_score\n",
    "\n",
    "model = AutoModelForSequenceClassification.from_pretrained(\n",
    "    BASE_MODEL,\n",
    "    num_labels=3,\n",
    "    id2label=ID2LABEL,\n",
    "    label2id=LABEL2ID,\n",
    "    ignore_mismatched_sizes=True,\n",
    ")\n",
    "\n",
    "def compute_metrics(eval_pred):\n",
    "    logits, labels = eval_pred\n",
    "    preds = np.argmax(logits, axis=-1)\n",
    "    return {\n",
    "        \"accuracy\": accuracy_score(labels, preds),\n",
    "        \"f1\": f1_score(labels, preds, average=\"weighted\", zero_division=0),\n",
    "    }\n",
    "\n",
    "training_args = TrainingArguments(\n",
    "    output_dir=OUTPUT_DIR,\n",
    "    num_train_epochs=EPOCHS,\n",
    "    per_device_train_batch_size=BATCH_SIZE,\n",
    "    per_device_eval_batch_size=BATCH_SIZE,\n",
    "    learning_rate=LR,\n",
    "    weight_decay=0.01,\n",
    "    warmup_ratio=0.1,\n",
    "    lr_scheduler_type=\"cosine\",\n",
    "    eval_strategy=\"epoch\",       # ← replaces deprecated evaluation_strategy\n",
    "    save_strategy=\"epoch\",\n",
    "    load_best_model_at_end=True,\n",
    "    metric_for_best_model=\"f1\",\n",
    "    greater_is_better=True,\n",
    "    logging_steps=50,\n",
    "    fp16=True,                   # T4 supports FP16 — ~2x speed boost\n",
    "    dataloader_num_workers=2,\n",
    "    report_to=\"none\",\n",
    ")\n",
    "\n",
    "trainer = Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=tokenized[\"train\"],\n",
    "    eval_dataset=tokenized[\"test\"],\n",
    "    compute_metrics=compute_metrics,\n",
    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],\n",
    ")\n",
    "\n",
    "print(\"✅ Model & Trainer ready.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "cell-08-train",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "🚀 Starting training ...\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='2340' max='2340' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [2340/2340 04:25, Epoch 5/5]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>F1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.331108</td>\n",
       "      <td>0.299753</td>\n",
       "      <td>0.882197</td>\n",
       "      <td>0.883166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.287276</td>\n",
       "      <td>0.249932</td>\n",
       "      <td>0.909470</td>\n",
       "      <td>0.909536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.193082</td>\n",
       "      <td>0.232398</td>\n",
       "      <td>0.920076</td>\n",
       "      <td>0.919938</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.181951</td>\n",
       "      <td>0.229638</td>\n",
       "      <td>0.923485</td>\n",
       "      <td>0.923234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.148084</td>\n",
       "      <td>0.244799</td>\n",
       "      <td>0.921591</td>\n",
       "      <td>0.921433</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "89c8b242441e433995dac74b6d960d3f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fb0fc6618ea240e3988d0db2228426e0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9607498cd8b64e098e9dd6a28d54e673",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "73f882613de74cb387b7c2ac3a4e4d16",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3f1655b6bc364954bbbe3482ca15a110",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "There were missing keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.weight', 'distilbert.embeddings.LayerNorm.bias'].\n",
      "There were unexpected keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.beta', 'distilbert.embeddings.LayerNorm.gamma'].\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "✅ Training complete!\n",
      "{'train_runtime': 267.0102, 'train_samples_per_second': 280.102, 'train_steps_per_second': 8.764, 'total_flos': 2476853356746240.0, 'train_loss': 0.3612339029964219, 'epoch': 5.0}\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 8 — Train (~20-35 min on T4)                           ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "print(\"\\n🚀 Starting training ...\")\n",
    "train_result = trainer.train()\n",
    "print(\"\\n✅ Training complete!\")\n",
    "print(train_result.metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "cell-09-save",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "08ae3f5fd0ea4ad790684abeda797454",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "✅ Model saved → /content/drive/MyDrive/Athernex/nli_contract_model_final\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 9 — Save model to Drive                                ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "model.save_pretrained(OUTPUT_DIR)\n",
    "tokenizer.save_pretrained(OUTPUT_DIR)\n",
    "print(f\"\\n✅ Model saved → {OUTPUT_DIR}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "cell-10-eval",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "📊 Final evaluation on validation split ...\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='83' max='83' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [83/83 00:02]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.22964176535606384, 'eval_accuracy': 0.9234848484848485, 'eval_f1': 0.9232344709378449, 'eval_runtime': 2.2543, 'eval_samples_per_second': 1171.072, 'eval_steps_per_second': 36.818, 'epoch': 5.0}\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 10 — Final eval on validation split                    ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "print(\"\\n📊 Final evaluation on validation split ...\")\n",
    "eval_results = trainer.evaluate()\n",
    "print(eval_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cell-11-heldout",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[Held-out] Loading MultiNLI validation_matched (government) ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2ee65214435a4afebff9dd1c35036c8e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  → 996 held-out samples\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3c9b2f86a19f4b04a4a19a196080489b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "============================================================\n",
      "HELD-OUT CLASSIFICATION REPORT\n",
      "============================================================\n",
      "               precision    recall  f1-score   support\n",
      "\n",
      "contradiction       0.85      0.86      0.86       335\n",
      "   entailment       0.90      0.84      0.87       354\n",
      "      neutral       0.78      0.83      0.80       307\n",
      "\n",
      "     accuracy                           0.84       996\n",
      "    macro avg       0.84      0.84      0.84       996\n",
      " weighted avg       0.85      0.84      0.85       996\n",
      "\n",
      "Confusion Matrix:\n",
      "                     Pred: entailment  Pred: contradiction  Pred: neutral\n",
      "True: entailment                  299                   17             38\n",
      "True: contradiction                12                  288             35\n",
      "True: neutral                      21                   32            254\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 11 — Held-out test (MultiNLI validation_matched)       ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "from transformers import pipeline as hf_pipeline\n",
    "\n",
    "print(\"\\n[Held-out] Loading MultiNLI validation_matched (government) ...\")\n",
    "val_raw = load_mnli_government(split=\"validation_matched\", size=5000)\n",
    "val_df  = process_mnli_government(val_raw)\n",
    "val_df  = val_df[val_df[\"label\"].isin(valid_labels)].dropna().reset_index(drop=True)\n",
    "print(f\"  → {len(val_df)} held-out samples\")\n",
    "\n",
    "# Batch pipeline — no top_k so batch_size works cleanly\n",
    "batch_pipe = hf_pipeline(\n",
    "    \"text-classification\",\n",
    "    model=OUTPUT_DIR,\n",
    "    tokenizer=tokenizer,\n",
    "    device=0,\n",
    "    batch_size=64,\n",
    "    truncation=True,\n",
    "    max_length=MAX_LEN,\n",
    ")\n",
    "\n",
    "texts       = [f\"{r.clause1} [SEP] {r.clause2}\" for r in val_df.itertuples()]\n",
    "raw_preds   = batch_pipe(texts)\n",
    "pred_labels = [p[\"label\"] for p in raw_preds]  # top-1 label per sample\n",
    "\n",
    "print(\"\\n\" + \"=\"*60)\n",
    "print(\"HELD-OUT CLASSIFICATION REPORT\")\n",
    "print(\"=\"*60)\n",
    "print(classification_report(val_df[\"label\"].tolist(), pred_labels))\n",
    "\n",
    "cm = confusion_matrix(\n",
    "    val_df[\"label\"].tolist(), pred_labels,\n",
    "    labels=[\"entailment\", \"contradiction\", \"neutral\"]\n",
    ")\n",
    "print(\"Confusion Matrix:\")\n",
    "print(pd.DataFrame(\n",
    "    cm,\n",
    "    index=[\"True: entailment\", \"True: contradiction\", \"True: neutral\"],\n",
    "    columns=[\"Pred: entailment\", \"Pred: contradiction\", \"Pred: neutral\"],\n",
    "))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "b407b6d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b7abaf3296d3431daa1cf5efd7fa31d7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "============================================================\n",
      "CONTRACT CLAUSE SMOKE TEST (CHALLENGING)\n",
      "============================================================\n",
      "\n",
      "✓  Expected: contradiction   | Got: contradiction   (99.81%)\n",
      "   C1: The agreement shall automatically renew for successive one-year terms unless ter\n",
      "   C2: This agreement expires on the end date and shall not renew automatically under a\n",
      "\n",
      "✗  Expected: contradiction   | Got: neutral         (41.68%)\n",
      "   C1: Seller warrants that all deliverables shall be free from defects for a period of\n",
      "   C2: Seller disclaims all warranties, express or implied, including any warranty of m\n",
      "\n",
      "✓  Expected: contradiction   | Got: contradiction   (99.94%)\n",
      "   C1: The Licensee is granted an exclusive, worldwide, perpetual license to use the So\n",
      "   C2: The license granted herein is non-exclusive, limited to the United States, and v\n",
      "\n",
      "✗  Expected: contradiction   | Got: neutral         (99.44%)\n",
      "   C1: All disputes arising under this agreement shall be resolved through binding arbi\n",
      "   C2: Either party may bring suit in any court of competent jurisdiction to resolve di\n",
      "\n",
      "✓  Expected: contradiction   | Got: contradiction   (99.10%)\n",
      "   C1: The contractor shall maintain professional liability insurance with coverage of \n",
      "   C2: The contractor is not required to carry any form of professional liability insur\n",
      "\n",
      "✓  Expected: entailment      | Got: entailment      (90.29%)\n",
      "   C1: Neither party shall disclose Confidential Information to any third party without\n",
      "   C2: Confidential Information must not be shared with outside parties unless the disc\n",
      "\n",
      "✓  Expected: entailment      | Got: entailment      (94.83%)\n",
      "   C1: The Company shall indemnify and hold harmless the Consultant against all claims \n",
      "   C2: If claims arise due to the Company's negligent acts, the Company is responsible \n",
      "\n",
      "✗  Expected: entailment      | Got: neutral         (63.14%)\n",
      "   C1: Force majeure events including natural disasters, war, and pandemic shall excuse\n",
      "   C2: Obligations under this contract are suspended during events such as pandemics, w\n",
      "\n",
      "✓  Expected: neutral         | Got: neutral         (99.97%)\n",
      "   C1: The governing law of this agreement shall be the laws of the State of Delaware.\n",
      "   C2: All invoices must be submitted within 30 days of service completion.\n",
      "\n",
      "✗  Expected: neutral         | Got: contradiction   (99.26%)\n",
      "   C1: Employee agrees to a 12-month non-compete restriction within a 50-mile radius.\n",
      "   C2: The company reserves the right to modify employee benefits at its sole discretio\n",
      "\n",
      "✓  Expected: neutral         | Got: neutral         (99.98%)\n",
      "   C1: The data processor shall implement AES-256 encryption for all data at rest.\n",
      "   C2: The data controller shall conduct annual privacy impact assessments.\n",
      "\n",
      "✗  Expected: contradiction   | Got: neutral         (99.91%)\n",
      "   C1: Payment of the full contract price is due upon delivery of the final deliverable\n",
      "   C2: Payment shall be made in four equal quarterly installments over the term of the \n",
      "\n",
      "✗  Expected: contradiction   | Got: neutral         (95.06%)\n",
      "   C1: Either party may terminate this agreement for convenience upon 30 days written n\n",
      "   C2: This agreement may only be terminated for cause, specifically material breach th\n",
      "\n",
      "✗  Expected: contradiction   | Got: entailment      (94.42%)\n",
      "   C1: The supplier shall deliver all goods FOB destination, with risk of loss transfer\n",
      "   C2: All shipments are FOB origin; risk of loss passes to the buyer when goods are te\n",
      "\n",
      "✓  Expected: contradiction   | Got: contradiction   (95.48%)\n",
      "   C1: Consultant retains all intellectual property rights in pre-existing materials in\n",
      "   C2: All work product, including any pre-existing materials used therein, shall be co\n",
      "\n",
      "============================================================\n",
      "Smoke test accuracy: 8/15\n",
      "============================================================\n",
      "\n",
      "🎉 All done! Model saved to Google Drive.\n"
     ]
    }
   ],
   "source": [
    "# ╔══════════════════════════════════════════════════════════════╗\n",
    "# ║  CELL 12 — Contract clause smoke test (challenging)          ║\n",
    "# ╚══════════════════════════════════════════════════════════════╝\n",
    "from transformers import pipeline as hf_pipeline\n",
    "\n",
    "EXAMPLE_PAIRS = [\n",
    "    # --- CONTRADICTIONS: subtle conflicts ---\n",
    "    (\"The agreement shall automatically renew for successive one-year terms unless terminated with 90 days prior written notice.\",\n",
    "     \"This agreement expires on the end date and shall not renew automatically under any circumstances.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    (\"Seller warrants that all deliverables shall be free from defects for a period of 24 months from acceptance.\",\n",
    "     \"Seller disclaims all warranties, express or implied, including any warranty of merchantability or fitness.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    (\"The Licensee is granted an exclusive, worldwide, perpetual license to use the Software.\",\n",
    "     \"The license granted herein is non-exclusive, limited to the United States, and valid for 12 months only.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    (\"All disputes arising under this agreement shall be resolved through binding arbitration in New York.\",\n",
    "     \"Either party may bring suit in any court of competent jurisdiction to resolve disputes under this agreement.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    (\"The contractor shall maintain professional liability insurance with coverage of no less than $5,000,000.\",\n",
    "     \"The contractor is not required to carry any form of professional liability insurance.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    # --- ENTAILMENTS: same meaning, different wording ---\n",
    "    (\"Neither party shall disclose Confidential Information to any third party without prior written consent.\",\n",
    "     \"Confidential Information must not be shared with outside parties unless the disclosing party agrees in writing.\",\n",
    "     \"entailment\"),\n",
    "\n",
    "    (\"The Company shall indemnify and hold harmless the Consultant against all claims arising from the Company's negligence.\",\n",
    "     \"If claims arise due to the Company's negligent acts, the Company is responsible for indemnifying the Consultant.\",\n",
    "     \"entailment\"),\n",
    "\n",
    "    (\"Force majeure events including natural disasters, war, and pandemic shall excuse performance obligations.\",\n",
    "     \"Obligations under this contract are suspended during events such as pandemics, wars, or natural disasters beyond the parties' control.\",\n",
    "     \"entailment\"),\n",
    "\n",
    "    # --- NEUTRALS: related but independent clauses ---\n",
    "    (\"The governing law of this agreement shall be the laws of the State of Delaware.\",\n",
    "     \"All invoices must be submitted within 30 days of service completion.\",\n",
    "     \"neutral\"),\n",
    "\n",
    "    (\"Employee agrees to a 12-month non-compete restriction within a 50-mile radius.\",\n",
    "     \"The company reserves the right to modify employee benefits at its sole discretion.\",\n",
    "     \"neutral\"),\n",
    "\n",
    "    (\"The data processor shall implement AES-256 encryption for all data at rest.\",\n",
    "     \"The data controller shall conduct annual privacy impact assessments.\",\n",
    "     \"neutral\"),\n",
    "\n",
    "    # --- HARD CASES: tricky edge cases ---\n",
    "    (\"Payment of the full contract price is due upon delivery of the final deliverable.\",\n",
    "     \"Payment shall be made in four equal quarterly installments over the term of the agreement.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    (\"Either party may terminate this agreement for convenience upon 30 days written notice.\",\n",
    "     \"This agreement may only be terminated for cause, specifically material breach that remains uncured for 60 days.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    (\"The supplier shall deliver all goods FOB destination, with risk of loss transferring upon delivery.\",\n",
    "     \"All shipments are FOB origin; risk of loss passes to the buyer when goods are tendered to the carrier.\",\n",
    "     \"contradiction\"),\n",
    "\n",
    "    (\"Consultant retains all intellectual property rights in pre-existing materials incorporated into the deliverables.\",\n",
    "     \"All work product, including any pre-existing materials used therein, shall be considered work-for-hire owned exclusively by the Client.\",\n",
    "     \"contradiction\"),\n",
    "]\n",
    "\n",
    "smoke_pipe = hf_pipeline(\n",
    "    \"text-classification\",\n",
    "    model=OUTPUT_DIR,\n",
    "    tokenizer=tokenizer,\n",
    "    device=0,\n",
    "    top_k=None,\n",
    "    truncation=True,\n",
    "    max_length=MAX_LEN,\n",
    ")\n",
    "\n",
    "print(\"\\n\" + \"=\"*60)\n",
    "print(\"CONTRACT CLAUSE SMOKE TEST (CHALLENGING)\")\n",
    "print(\"=\"*60)\n",
    "correct = 0\n",
    "for clause1, clause2, expected in EXAMPLE_PAIRS:\n",
    "    result = smoke_pipe(f\"{clause1} [SEP] {clause2}\")\n",
    "    if result and isinstance(result[0], list):\n",
    "        result = result[0]\n",
    "    scores = {r[\"label\"]: r[\"score\"] for r in result}\n",
    "    got    = max(scores, key=scores.get)\n",
    "    mark   = \"✓\" if got == expected else \"✗\"\n",
    "    correct += (got == expected)\n",
    "    print(f\"\\n{mark}  Expected: {expected:15s} | Got: {got:15s} ({scores[got]:.2%})\")\n",
    "    print(f\"   C1: {clause1[:80]}\")\n",
    "    print(f\"   C2: {clause2[:80]}\")\n",
    "\n",
    "print(f\"\\n{'='*60}\")\n",
    "print(f\"Smoke test accuracy: {correct}/{len(EXAMPLE_PAIRS)}\")\n",
    "print(\"=\"*60)\n",
    "print(\"\\n🎉 All done! Model saved to Google Drive.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44317c51",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}