Spaces:

retroam
/

vendsim-vb2

Sleeping

File size: 15,140 Bytes

ba531fa

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cell-0",
   "metadata": {},
   "source": [
    "# Vending-Bench 2 — Lambda GPU Setup Verification\n",
    "\n",
    "This notebook verifies that a **Lambda GH200 instance** is correctly configured to run\n",
    "the VB2 GRPO training notebook (`01_vb2_training_grpo.ipynb`).\n",
    "\n",
    "It checks:\n",
    "1. Dependencies install correctly\n",
    "2. CUDA/GPU is available (GH200, 480GB VRAM)\n",
    "3. TRL imports without vllm ABI errors\n",
    "4. Model loads with 4-bit quantization + LoRA\n",
    "5. Gradient flow works (forward + backward pass)\n",
    "6. VB2 environment works — all tool calls verified via direct Python API"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-1",
   "metadata": {},
   "source": [
    "## 1 — Install Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cell-2",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "import os\n",
    "os.environ[\"TRANSFORMERS_NO_TF\"] = \"1\"\n",
    "os.environ[\"USE_TF\"] = \"0\"\n",
    "\n",
    "!pip install -qqq uv\n",
    "\n",
    "!uv pip uninstall --system torch torchvision torchaudio torchao torch_c_dlpack_ext unsloth unsloth_zoo vllm 2>/dev/null\n",
    "\n",
    "!uv pip install --system \"numpy<2\" \"transformers>=4.49,<4.52\" \"trl>=0.15,<0.17\" \\\n",
    "    peft datasets accelerate bitsandbytes \\\n",
    "    openenv-core fastmcp matplotlib\n",
    "\n",
    "# Remove vllm again in case trl re-pulled it\n",
    "!uv pip uninstall --system vllm 2>/dev/null || true"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-3",
   "metadata": {},
   "source": [
    "## 2 — Verify CUDA / GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cell-4",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'torch._C._CudaDeviceProperties' object has no attribute 'total_mem'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 6\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available(), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCUDA not available!\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      5\u001b[0m gpu_name \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mget_device_name(\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m vram_gb \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_device_properties\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtotal_mem\u001b[49m \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1024\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m3\u001b[39m)\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch:    \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtorch\u001b[38;5;241m.\u001b[39m__version__\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCUDA:     \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtorch\u001b[38;5;241m.\u001b[39mversion\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'torch._C._CudaDeviceProperties' object has no attribute 'total_mem'"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "assert torch.cuda.is_available(), \"CUDA not available!\"\n",
    "\n",
    "gpu_name = torch.cuda.get_device_name(0)\n",
    "vram_gb = torch.cuda.get_device_properties(0).total_mem / (1024**3)\n",
    "\n",
    "print(f\"torch:    {torch.__version__}\")\n",
    "print(f\"CUDA:     {torch.version.cuda}\")\n",
    "print(f\"GPU:      {gpu_name}\")\n",
    "print(f\"VRAM:     {vram_gb:.0f} GB\")\n",
    "\n",
    "assert \"GH200\" in gpu_name or vram_gb > 400, f\"Expected GH200 with 480GB VRAM, got {gpu_name} with {vram_gb:.0f}GB\"\n",
    "print(\"\\nGPU verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-5",
   "metadata": {},
   "source": [
    "## 3 — Block vllm & Verify TRL Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "\n",
    "# Remove any cached vllm modules\n",
    "for key in list(sys.modules.keys()):\n",
    "    if key == \"vllm\" or key.startswith(\"vllm.\"):\n",
    "        del sys.modules[key]\n",
    "\n",
    "# Patch TRL's availability check\n",
    "import trl.import_utils\n",
    "trl.import_utils._vllm_available = False\n",
    "\n",
    "from trl import GRPOConfig, GRPOTrainer\n",
    "print(f\"trl:      {trl.__version__}\")\n",
    "print(f\"GRPOConfig imported: {GRPOConfig is not None}\")\n",
    "print(f\"GRPOTrainer imported: {GRPOTrainer is not None}\")\n",
    "print(\"\\nTRL import verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-7",
   "metadata": {},
   "source": [
    "## 4 — Load Model with 4-bit Quantization + LoRA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
    "from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training\n",
    "\n",
    "bnb_config = BitsAndBytesConfig(\n",
    "    load_in_4bit=True,\n",
    "    bnb_4bit_quant_type=\"nf4\",\n",
    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
    ")\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    \"Qwen/Qwen2.5-1.5B-Instruct\",\n",
    "    quantization_config=bnb_config,\n",
    "    device_map=\"auto\",\n",
    "    torch_dtype=torch.bfloat16,\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"Qwen/Qwen2.5-1.5B-Instruct\")\n",
    "\n",
    "model = prepare_model_for_kbit_training(model)\n",
    "\n",
    "lora_config = LoraConfig(\n",
    "    r=8,\n",
    "    lora_alpha=16,\n",
    "    target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                    \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
    "    lora_dropout=0.0,\n",
    "    task_type=\"CAUSAL_LM\",\n",
    ")\n",
    "model = get_peft_model(model, lora_config)\n",
    "model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={\"use_reentrant\": False})\n",
    "model.print_trainable_parameters()\n",
    "print(\"\\nModel loading verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-9",
   "metadata": {},
   "source": [
    "## 5 — Verify Gradient Flow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-10",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run a small forward + backward pass to catch gradient issues early\n",
    "test_input = tokenizer(\"Hello, world!\", return_tensors=\"pt\").to(\"cuda\")\n",
    "test_input[\"labels\"] = test_input[\"input_ids\"].clone()\n",
    "\n",
    "output = model(**test_input)\n",
    "loss = output.loss\n",
    "loss.backward()\n",
    "\n",
    "# Check that at least some LoRA parameters got gradients\n",
    "grad_params = sum(1 for p in model.parameters() if p.grad is not None and p.grad.abs().sum() > 0)\n",
    "print(f\"Loss:                {loss.item():.4f}\")\n",
    "print(f\"Params with grads:   {grad_params}\")\n",
    "assert grad_params > 0, \"No parameters received gradients!\"\n",
    "\n",
    "model.zero_grad()\n",
    "print(\"\\nGradient flow verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-11",
   "metadata": {},
   "source": [
    "## 6 — VB2 Environment: Import & Reset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-12",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "!git clone https://github.com/retroam/vendsim-vb2.git /tmp/vendsim-vb2 2>/dev/null || true\n",
    "import sys\n",
    "sys.path.insert(0, '/tmp/vendsim-vb2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-13",
   "metadata": {},
   "outputs": [],
   "source": [
    "from vendsim_vb2.environment import VendingBench2Environment\n",
    "from vendsim_vb2.demand import PRODUCTS\n",
    "import json\n",
    "\n",
    "env = VendingBench2Environment(seed=42)\n",
    "env.reset()\n",
    "\n",
    "print(f\"Starting balance: ${env.state.cash_balance}\")\n",
    "print(f\"Products: {list(PRODUCTS.keys())}\")\n",
    "print(f\"Day: {env.state.day_index}\")\n",
    "assert env.state.cash_balance == 500.0\n",
    "assert len(PRODUCTS) == 5\n",
    "print(\"\\nEnvironment reset verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-14",
   "metadata": {},
   "source": [
    "## 7 — VB2 Tool Calls: Explore Balance & Inventory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-15",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check balance\n",
    "balance = env.state.cash_balance\n",
    "print(f\"Balance: ${balance}\")\n",
    "assert isinstance(balance, (int, float))\n",
    "\n",
    "# Check storage inventory\n",
    "storage_inv = dict(env.state.storage_inventory)\n",
    "print(f\"Storage inventory: {storage_inv}\")\n",
    "assert isinstance(storage_inv, dict)\n",
    "\n",
    "# Check machine inventory\n",
    "machine_inv = dict(env.state.machine_inventory)\n",
    "print(f\"Machine inventory: {machine_inv}\")\n",
    "assert isinstance(machine_inv, dict)\n",
    "\n",
    "print(\"\\nBalance & inventory verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-16",
   "metadata": {},
   "source": [
    "## 8 — VB2 Tool Calls: Set Prices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-17",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set prices for all products\n",
    "test_prices = {\"soda\": 1.75, \"water\": 1.25, \"candy\": 1.00, \"chips\": 2.50, \"sandwich\": 4.50}\n",
    "\n",
    "for product, price in test_prices.items():\n",
    "    env.set_price(product, price)\n",
    "    current = env.state.prices.get(product)\n",
    "    print(f\"  {product}: set to ${price} -> current: ${current}\")\n",
    "    assert current == price, f\"Price mismatch for {product}: expected {price}, got {current}\"\n",
    "\n",
    "print(\"\\nPrice setting verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-18",
   "metadata": {},
   "source": [
    "## 9 — VB2 Tool Calls: Supplier Quote & Inventory Restock"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-19",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Request supplier quote\n",
    "quote = env.request_supplier_quote(\"chips\", 20)\n",
    "print(f\"Supplier quote: {quote}\")\n",
    "assert quote is not None\n",
    "\n",
    "# Stock storage and restock machine\n",
    "for product in PRODUCTS:\n",
    "    env.state.storage_inventory[product] = 20\n",
    "\n",
    "restock_result = env.run_sub_agent(\"restock_machine\", product=\"soda\", qty=3)\n",
    "print(f\"Restock result: {restock_result}\")\n",
    "assert restock_result is not None\n",
    "\n",
    "print(\"\\nSupplier & restock verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-20",
   "metadata": {},
   "source": [
    "## 10 — VB2 Tool Calls: Scratchpad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-21",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Write to scratchpad\n",
    "env.write_scratchpad(\"Setup verification: all systems nominal.\")\n",
    "print(\"Scratchpad written.\")\n",
    "\n",
    "# Read scratchpad\n",
    "notes = env.read_scratchpad()\n",
    "print(f\"Scratchpad contents: {notes}\")\n",
    "assert \"Setup verification\" in str(notes)\n",
    "\n",
    "print(\"\\nScratchpad verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-22",
   "metadata": {},
   "source": [
    "## 11 — VB2 Tool Calls: Advance Days & Observe Sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-23",
   "metadata": {},
   "outputs": [],
   "source": [
    "NUM_DAYS = 5\n",
    "\n",
    "print(f\"Advancing {NUM_DAYS} days...\\n\")\n",
    "for i in range(NUM_DAYS):\n",
    "    result = env.wait_for_next_day()\n",
    "    sales = result.payload.get('sales', {})\n",
    "    revenue = result.payload.get('revenue', 0.0)\n",
    "    print(f\"  Day {env.state.day_index - 1}: sales={sales}, revenue=${revenue:.2f}, balance=${env.state.cash_balance:.2f}\")\n",
    "\n",
    "final_balance = env.state.cash_balance\n",
    "print(f\"\\nFinal balance after {NUM_DAYS} days: ${final_balance:.2f}\")\n",
    "assert isinstance(final_balance, (int, float))\n",
    "\n",
    "print(\"\\nDay advancement verification passed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-24",
   "metadata": {},
   "source": [
    "## 12 — Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-25",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"=\"*50)\n",
    "print(\" Lambda GH200 Setup Verification\")\n",
    "print(\"=\"*50)\n",
    "print(f\"  [PASS] Dependencies installed\")\n",
    "print(f\"  [PASS] CUDA/GPU: {gpu_name}, {vram_gb:.0f}GB VRAM\")\n",
    "print(f\"  [PASS] TRL imports (vllm blocked)\")\n",
    "print(f\"  [PASS] Model: 4-bit quant + LoRA\")\n",
    "print(f\"  [PASS] Gradient flow\")\n",
    "print(f\"  [PASS] VB2 environment reset\")\n",
    "print(f\"  [PASS] Balance & inventory checks\")\n",
    "print(f\"  [PASS] Price setting\")\n",
    "print(f\"  [PASS] Supplier quote & restock\")\n",
    "print(f\"  [PASS] Scratchpad read/write\")\n",
    "print(f\"  [PASS] Day advancement & sales\")\n",
    "print(\"=\"*50)\n",
    "print(\"\\nAll checks passed. Ready to run 01_vb2_training_grpo.ipynb\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}