chipling commited on
Commit
942c98a
·
verified ·
1 Parent(s): d5df505

Upload main.ipynb with huggingface_hub

Browse files
Files changed (1) hide show
  1. main.ipynb +3 -13
main.ipynb CHANGED
@@ -148,7 +148,7 @@
148
  "\n",
149
  " # Training\n",
150
  " seq_len: int = 256\n",
151
- " batch_size: int = 16 # T4 16GB \u2014 small batch, more accum\n",
152
  " grad_accum_steps: int = 2 # Effective batch = 128\n",
153
  " learning_rate: float = 3e-4\n",
154
  " weight_decay: float = 0.01\n",
@@ -642,7 +642,7 @@
642
  "model_unwrapped = model\n",
643
  "if torch.cuda.device_count() > 1:\n",
644
  " print(f\"\\nUsing {torch.cuda.device_count()} GPUs with DataParallel!\")\n",
645
- " model_dp = nn.DataParallel(model)\n",
646
  "else:\n",
647
  " model_dp = model\n",
648
  "\n",
@@ -856,7 +856,6 @@
856
  "cell_type": "markdown",
857
  "id": "resume_md",
858
  "metadata": {},
859
- "outputs": [],
860
  "source": [
861
  "## Resume from HuggingFace Checkpoint\n",
862
  "\n",
@@ -866,6 +865,7 @@
866
  },
867
  {
868
  "cell_type": "code",
 
869
  "id": "resume_code",
870
  "metadata": {},
871
  "outputs": [],
@@ -1957,16 +1957,6 @@
1957
  "print(f'Total tokens processed: {tokens_processed:,}')\n"
1958
  ]
1959
  },
1960
- {
1961
- "cell_type": "code",
1962
- "execution_count": null,
1963
- "id": "efed12b1",
1964
- "metadata": {},
1965
- "outputs": [],
1966
- "source": [
1967
- "from google.colab import files; files.download('checkpoint_small.pt')\n"
1968
- ]
1969
- },
1970
  {
1971
  "cell_type": "code",
1972
  "execution_count": null,
 
148
  "\n",
149
  " # Training\n",
150
  " seq_len: int = 256\n",
151
+ " batch_size: int = 32 # T4 16GB \u2014 small batch, more accum\n",
152
  " grad_accum_steps: int = 2 # Effective batch = 128\n",
153
  " learning_rate: float = 3e-4\n",
154
  " weight_decay: float = 0.01\n",
 
642
  "model_unwrapped = model\n",
643
  "if torch.cuda.device_count() > 1:\n",
644
  " print(f\"\\nUsing {torch.cuda.device_count()} GPUs with DataParallel!\")\n",
645
+ " model_dp = nn.DataParallel(model, device_ids=[0, 1], output_device=0)\n",
646
  "else:\n",
647
  " model_dp = model\n",
648
  "\n",
 
856
  "cell_type": "markdown",
857
  "id": "resume_md",
858
  "metadata": {},
 
859
  "source": [
860
  "## Resume from HuggingFace Checkpoint\n",
861
  "\n",
 
865
  },
866
  {
867
  "cell_type": "code",
868
+ "execution_count": null,
869
  "id": "resume_code",
870
  "metadata": {},
871
  "outputs": [],
 
1957
  "print(f'Total tokens processed: {tokens_processed:,}')\n"
1958
  ]
1959
  },
 
 
 
 
 
 
 
 
 
 
1960
  {
1961
  "cell_type": "code",
1962
  "execution_count": null,