{ "cells": [ { "cell_type": "markdown", "id": "cell-0", "metadata": {}, "source": [ "# Vending-Bench 2 — Lambda GPU Setup Verification\n", "\n", "This notebook verifies that a **Lambda GH200 instance** is correctly configured to run\n", "the VB2 GRPO training notebook (`01_vb2_training_grpo.ipynb`).\n", "\n", "It checks:\n", "1. Dependencies install correctly\n", "2. CUDA/GPU is available (GH200, 480GB VRAM)\n", "3. TRL imports without vllm ABI errors\n", "4. Model loads with 4-bit quantization + LoRA\n", "5. Gradient flow works (forward + backward pass)\n", "6. VB2 environment works — all tool calls verified via direct Python API" ] }, { "cell_type": "markdown", "id": "cell-1", "metadata": {}, "source": [ "## 1 — Install Dependencies" ] }, { "cell_type": "code", "execution_count": 3, "id": "cell-2", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "import os\n", "os.environ[\"TRANSFORMERS_NO_TF\"] = \"1\"\n", "os.environ[\"USE_TF\"] = \"0\"\n", "\n", "!pip install -qqq uv\n", "\n", "!uv pip uninstall --system torch torchvision torchaudio torchao torch_c_dlpack_ext unsloth unsloth_zoo vllm 2>/dev/null\n", "\n", "!uv pip install --system \"numpy<2\" \"transformers>=4.49,<4.52\" \"trl>=0.15,<0.17\" \\\n", " peft datasets accelerate bitsandbytes \\\n", " openenv-core fastmcp matplotlib\n", "\n", "# Remove vllm again in case trl re-pulled it\n", "!uv pip uninstall --system vllm 2>/dev/null || true" ] }, { "cell_type": "markdown", "id": "cell-3", "metadata": {}, "source": [ "## 2 — Verify CUDA / GPU" ] }, { "cell_type": "code", "execution_count": 4, "id": "cell-4", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'torch._C._CudaDeviceProperties' object has no attribute 'total_mem'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[4], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available(), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCUDA not available!\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m gpu_name \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mget_device_name(\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m vram_gb \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_device_properties\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtotal_mem\u001b[49m \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1024\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m3\u001b[39m)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtorch\u001b[38;5;241m.\u001b[39m__version__\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCUDA: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtorch\u001b[38;5;241m.\u001b[39mversion\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", "\u001b[0;31mAttributeError\u001b[0m: 'torch._C._CudaDeviceProperties' object has no attribute 'total_mem'" ] } ], "source": [ "import torch\n", "\n", "assert torch.cuda.is_available(), \"CUDA not available!\"\n", "\n", "gpu_name = torch.cuda.get_device_name(0)\n", "vram_gb = torch.cuda.get_device_properties(0).total_mem / (1024**3)\n", "\n", "print(f\"torch: {torch.__version__}\")\n", "print(f\"CUDA: {torch.version.cuda}\")\n", "print(f\"GPU: {gpu_name}\")\n", "print(f\"VRAM: {vram_gb:.0f} GB\")\n", "\n", "assert \"GH200\" in gpu_name or vram_gb > 400, f\"Expected GH200 with 480GB VRAM, got {gpu_name} with {vram_gb:.0f}GB\"\n", "print(\"\\nGPU verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-5", "metadata": {}, "source": [ "## 3 — Block vllm & Verify TRL Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-6", "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", "# Remove any cached vllm modules\n", "for key in list(sys.modules.keys()):\n", " if key == \"vllm\" or key.startswith(\"vllm.\"):\n", " del sys.modules[key]\n", "\n", "# Patch TRL's availability check\n", "import trl.import_utils\n", "trl.import_utils._vllm_available = False\n", "\n", "from trl import GRPOConfig, GRPOTrainer\n", "print(f\"trl: {trl.__version__}\")\n", "print(f\"GRPOConfig imported: {GRPOConfig is not None}\")\n", "print(f\"GRPOTrainer imported: {GRPOTrainer is not None}\")\n", "print(\"\\nTRL import verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-7", "metadata": {}, "source": [ "## 4 — Load Model with 4-bit Quantization + LoRA" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-8", "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n", "from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training\n", "\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", ")\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\n", " \"Qwen/Qwen2.5-1.5B-Instruct\",\n", " quantization_config=bnb_config,\n", " device_map=\"auto\",\n", " torch_dtype=torch.bfloat16,\n", ")\n", "tokenizer = AutoTokenizer.from_pretrained(\"Qwen/Qwen2.5-1.5B-Instruct\")\n", "\n", "model = prepare_model_for_kbit_training(model)\n", "\n", "lora_config = LoraConfig(\n", " r=8,\n", " lora_alpha=16,\n", " target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\"],\n", " lora_dropout=0.0,\n", " task_type=\"CAUSAL_LM\",\n", ")\n", "model = get_peft_model(model, lora_config)\n", "model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={\"use_reentrant\": False})\n", "model.print_trainable_parameters()\n", "print(\"\\nModel loading verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-9", "metadata": {}, "source": [ "## 5 — Verify Gradient Flow" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-10", "metadata": {}, "outputs": [], "source": [ "# Run a small forward + backward pass to catch gradient issues early\n", "test_input = tokenizer(\"Hello, world!\", return_tensors=\"pt\").to(\"cuda\")\n", "test_input[\"labels\"] = test_input[\"input_ids\"].clone()\n", "\n", "output = model(**test_input)\n", "loss = output.loss\n", "loss.backward()\n", "\n", "# Check that at least some LoRA parameters got gradients\n", "grad_params = sum(1 for p in model.parameters() if p.grad is not None and p.grad.abs().sum() > 0)\n", "print(f\"Loss: {loss.item():.4f}\")\n", "print(f\"Params with grads: {grad_params}\")\n", "assert grad_params > 0, \"No parameters received gradients!\"\n", "\n", "model.zero_grad()\n", "print(\"\\nGradient flow verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-11", "metadata": {}, "source": [ "## 6 — VB2 Environment: Import & Reset" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-12", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "!git clone https://github.com/retroam/vendsim-vb2.git /tmp/vendsim-vb2 2>/dev/null || true\n", "import sys\n", "sys.path.insert(0, '/tmp/vendsim-vb2')" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-13", "metadata": {}, "outputs": [], "source": [ "from vendsim_vb2.environment import VendingBench2Environment\n", "from vendsim_vb2.demand import PRODUCTS\n", "import json\n", "\n", "env = VendingBench2Environment(seed=42)\n", "env.reset()\n", "\n", "print(f\"Starting balance: ${env.state.cash_balance}\")\n", "print(f\"Products: {list(PRODUCTS.keys())}\")\n", "print(f\"Day: {env.state.day_index}\")\n", "assert env.state.cash_balance == 500.0\n", "assert len(PRODUCTS) == 5\n", "print(\"\\nEnvironment reset verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-14", "metadata": {}, "source": [ "## 7 — VB2 Tool Calls: Explore Balance & Inventory" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-15", "metadata": {}, "outputs": [], "source": [ "# Check balance\n", "balance = env.state.cash_balance\n", "print(f\"Balance: ${balance}\")\n", "assert isinstance(balance, (int, float))\n", "\n", "# Check storage inventory\n", "storage_inv = dict(env.state.storage_inventory)\n", "print(f\"Storage inventory: {storage_inv}\")\n", "assert isinstance(storage_inv, dict)\n", "\n", "# Check machine inventory\n", "machine_inv = dict(env.state.machine_inventory)\n", "print(f\"Machine inventory: {machine_inv}\")\n", "assert isinstance(machine_inv, dict)\n", "\n", "print(\"\\nBalance & inventory verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-16", "metadata": {}, "source": [ "## 8 — VB2 Tool Calls: Set Prices" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-17", "metadata": {}, "outputs": [], "source": [ "# Set prices for all products\n", "test_prices = {\"soda\": 1.75, \"water\": 1.25, \"candy\": 1.00, \"chips\": 2.50, \"sandwich\": 4.50}\n", "\n", "for product, price in test_prices.items():\n", " env.set_price(product, price)\n", " current = env.state.prices.get(product)\n", " print(f\" {product}: set to ${price} -> current: ${current}\")\n", " assert current == price, f\"Price mismatch for {product}: expected {price}, got {current}\"\n", "\n", "print(\"\\nPrice setting verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-18", "metadata": {}, "source": [ "## 9 — VB2 Tool Calls: Supplier Quote & Inventory Restock" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-19", "metadata": {}, "outputs": [], "source": [ "# Request supplier quote\n", "quote = env.request_supplier_quote(\"chips\", 20)\n", "print(f\"Supplier quote: {quote}\")\n", "assert quote is not None\n", "\n", "# Stock storage and restock machine\n", "for product in PRODUCTS:\n", " env.state.storage_inventory[product] = 20\n", "\n", "restock_result = env.run_sub_agent(\"restock_machine\", product=\"soda\", qty=3)\n", "print(f\"Restock result: {restock_result}\")\n", "assert restock_result is not None\n", "\n", "print(\"\\nSupplier & restock verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-20", "metadata": {}, "source": [ "## 10 — VB2 Tool Calls: Scratchpad" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-21", "metadata": {}, "outputs": [], "source": [ "# Write to scratchpad\n", "env.write_scratchpad(\"Setup verification: all systems nominal.\")\n", "print(\"Scratchpad written.\")\n", "\n", "# Read scratchpad\n", "notes = env.read_scratchpad()\n", "print(f\"Scratchpad contents: {notes}\")\n", "assert \"Setup verification\" in str(notes)\n", "\n", "print(\"\\nScratchpad verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-22", "metadata": {}, "source": [ "## 11 — VB2 Tool Calls: Advance Days & Observe Sales" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-23", "metadata": {}, "outputs": [], "source": [ "NUM_DAYS = 5\n", "\n", "print(f\"Advancing {NUM_DAYS} days...\\n\")\n", "for i in range(NUM_DAYS):\n", " result = env.wait_for_next_day()\n", " sales = result.payload.get('sales', {})\n", " revenue = result.payload.get('revenue', 0.0)\n", " print(f\" Day {env.state.day_index - 1}: sales={sales}, revenue=${revenue:.2f}, balance=${env.state.cash_balance:.2f}\")\n", "\n", "final_balance = env.state.cash_balance\n", "print(f\"\\nFinal balance after {NUM_DAYS} days: ${final_balance:.2f}\")\n", "assert isinstance(final_balance, (int, float))\n", "\n", "print(\"\\nDay advancement verification passed.\")" ] }, { "cell_type": "markdown", "id": "cell-24", "metadata": {}, "source": [ "## 12 — Summary" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-25", "metadata": {}, "outputs": [], "source": [ "print(\"=\"*50)\n", "print(\" Lambda GH200 Setup Verification\")\n", "print(\"=\"*50)\n", "print(f\" [PASS] Dependencies installed\")\n", "print(f\" [PASS] CUDA/GPU: {gpu_name}, {vram_gb:.0f}GB VRAM\")\n", "print(f\" [PASS] TRL imports (vllm blocked)\")\n", "print(f\" [PASS] Model: 4-bit quant + LoRA\")\n", "print(f\" [PASS] Gradient flow\")\n", "print(f\" [PASS] VB2 environment reset\")\n", "print(f\" [PASS] Balance & inventory checks\")\n", "print(f\" [PASS] Price setting\")\n", "print(f\" [PASS] Supplier quote & restock\")\n", "print(f\" [PASS] Scratchpad read/write\")\n", "print(f\" [PASS] Day advancement & sales\")\n", "print(\"=\"*50)\n", "print(\"\\nAll checks passed. Ready to run 01_vb2_training_grpo.ipynb\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }