chipling
/

opium-mdlm

chipling commited on 8 days ago

Commit

927e447

verified ·

1 Parent(s): 8a8a55b

Upload main.ipynb with huggingface_hub

Files changed (1) hide show

main.ipynb CHANGED Viewed

@@ -643,7 +643,7 @@
     "# Quick memory test\n",
     "with torch.no_grad():\n",
     "    test_input = torch.randint(0, 50257, (config.batch_size, config.seq_len), device=device)\n",
-    "    _ = model.compute_loss(test_input)\n",
     "    print(f\"Memory after forward: {torch.cuda.memory_allocated() / 1e9:.2f} GB / {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n",
     "    del test_input, _\n",
     "    torch.cuda.empty_cache()\n",
@@ -1874,7 +1874,7 @@
     "        tokens_processed += batch.numel()\n",
     "\n",
     "        with autocast('cuda', dtype=torch.float16):\n",
-    "            result = model.compute_loss(batch)\n",
     "            loss = result['loss'] / config.grad_accum_steps\n",
     "\n",
     "        scaler.scale(loss).backward()\n",

     "# Quick memory test\n",
     "with torch.no_grad():\n",
     "    test_input = torch.randint(0, 50257, (config.batch_size, config.seq_len), device=device)\n",
+    "    _ = model_unwrapped.compute_loss(test_input)\n",
     "    print(f\"Memory after forward: {torch.cuda.memory_allocated() / 1e9:.2f} GB / {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n",
     "    del test_input, _\n",
     "    torch.cuda.empty_cache()\n",
     "        tokens_processed += batch.numel()\n",
     "\n",
     "        with autocast('cuda', dtype=torch.float16):\n",
+    "            result = model_unwrapped.compute_loss(batch)\n",
     "            loss = result['loss'] / config.grad_accum_steps\n",
     "\n",
     "        scaler.scale(loss).backward()\n",