walidsobhie-code Claude Opus 4.6 commited on
Commit
a075b90
·
1 Parent(s): 2064035

fix: improve Colab notebook for training

Browse files

- Fix subprocess to use direct shell command for better output
- Add environment variables for GPU optimization
- Add variables for easier token/username configuration
- Improve dependency versions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. notebooks/colab_128k_training.ipynb +11 -49
notebooks/colab_128k_training.ipynb CHANGED
@@ -22,17 +22,7 @@
22
  "execution_count": null,
23
  "metadata": {},
24
  "outputs": [],
25
- "source": [
26
- "import os\n",
27
- "os.chdir(\"/content\")\n",
28
- "\n",
29
- "# Clone repo\n",
30
- "!git clone https://github.com/my-ai-stack/stack-2.9.git\n",
31
- "\n",
32
- "# Install dependencies\n",
33
- "!pip install -q transformers peft datasets bitsandbytes accelerate huggingface_hub\n",
34
- "!pip install -q scipy torch --upgrade"
35
- ]
36
  },
37
  {
38
  "cell_type": "markdown",
@@ -46,12 +36,7 @@
46
  "execution_count": null,
47
  "metadata": {},
48
  "outputs": [],
49
- "source": [
50
- "from huggingface_hub import login\n",
51
- "\n",
52
- "# Get your token at: https://huggingface.co/settings/tokens\n",
53
- "login(token=\"YOUR_HF_TOKEN\") # ← Replace with your token"
54
- ]
55
  },
56
  {
57
  "cell_type": "markdown",
@@ -65,47 +50,24 @@
65
  "execution_count": null,
66
  "metadata": {},
67
  "outputs": [],
68
- "source": [
69
- "# Uncomment to mount Drive\n",
70
- "# from google.colab import drive\n",
71
- "# drive.mount('/content/drive')\n",
72
- "# OUTPUT_DIR = \"/content/drive/MyDrive/stack-2.9-128k-output\"\n",
73
- "\n",
74
- "# Otherwise saves to /content/stack-2.9/output/\n",
75
- "OUTPUT_DIR = \"/content/stack-2.9/output/stack-2.9-128k\""
76
- ]
77
  },
78
  {
79
  "cell_type": "markdown",
80
  "metadata": {},
81
- "source": [
82
- "## Step 4: Run 128K Context Fine-tuning"
83
- ]
84
  },
85
  {
86
  "cell_type": "code",
87
  "execution_count": null,
88
  "metadata": {},
89
  "outputs": [],
90
- "source": [
91
- "import subprocess\n",
92
- "os.chdir(\"/content/stack-2.9\")\n",
93
- "\n",
94
- "result = subprocess.run([\n",
95
- " \"python3\", \"training/train_extended_context.py\",\n",
96
- " \"--model-path\", \"Qwen/Qwen2.5-Coder-1.5B\",\n",
97
- " \"--data-path\", \"training/training-data/tool_examples_combined.jsonl\",\n",
98
- " \"--output-dir\", OUTPUT_DIR,\n",
99
- " \"--context-length\", \"131072\",\n",
100
- " \"--lora-rank\", \"64\",\n",
101
- " \"--epochs\", \"3\",\n",
102
- " \"--push-to-hub\",\n",
103
- " \"--hub-model-id\", \"YOUR_USERNAME/stack-2.9-128k\" # ← Replace with your username\n",
104
- "], capture_output=True, text=True)\n",
105
- "\n",
106
- "print(\"STDOUT:\", result.stdout[-5000:] if result.stdout else \"None\")\n",
107
- "print(\"STDERR:\", result.stderr[-2000:] if result.stderr else \"None\")"
108
- ]
109
  },
110
  {
111
  "cell_type": "markdown",
@@ -143,4 +105,4 @@
143
  },
144
  "nbformat": 4,
145
  "nbformat_minor": 4
146
- }
 
22
  "execution_count": null,
23
  "metadata": {},
24
  "outputs": [],
25
+ "source": "import os\nos.chdir(\"/content\")\n\n# Clone repo\n!git clone https://github.com/my-ai-stack/stack-2.9.git\n\n# Install dependencies\n!pip install -q transformers>=4.40.0 peft datasets bitsandbytes accelerate huggingface_hub\n!pip install -q scipy torch --upgrade\n!pip install -q flash-attn --no-build-isolation # Optional: for faster attention"
 
 
 
 
 
 
 
 
 
 
26
  },
27
  {
28
  "cell_type": "markdown",
 
36
  "execution_count": null,
37
  "metadata": {},
38
  "outputs": [],
39
+ "source": "from huggingface_hub import login\n\n# Get your token at: https://huggingface.co/settings/tokens\n# Replace with your actual token before running\nHF_TOKEN = \"YOUR_HF_TOKEN\" # ← Replace with your HF token\nlogin(token=HF_TOKEN)\nprint(\"✓ Logged in to HuggingFace\")"
 
 
 
 
 
40
  },
41
  {
42
  "cell_type": "markdown",
 
50
  "execution_count": null,
51
  "metadata": {},
52
  "outputs": [],
53
+ "source": "# Check GPU and environment\nimport torch\nprint(f\"✓ PyTorch version: {torch.__version__}\")\nprint(f\"✓ CUDA available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n print(f\"✓ GPU: {torch.cuda.get_device_name(0)}\")\n print(f\"✓ VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n\n# Mount Google Drive (optional)\n# from google.colab import drive\n# drive.mount('/content/drive')\n# OUTPUT_DIR = \"/content/drive/MyDrive/stack-2.9-128k-output\"\n\nOUTPUT_DIR = \"/content/stack-2.9/output/stack-2.9-128k\""
54
+ },
55
+ {
56
+ "cell_type": "markdown",
57
+ "source": "## Step 3.5: Verify GPU Setup",
58
+ "metadata": {}
 
 
 
59
  },
60
  {
61
  "cell_type": "markdown",
62
  "metadata": {},
63
+ "source": "import os\nos.chdir(\"/content/stack-2.9\")\n\n# Set environment for better GPU usage\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\nos.environ[\"TRANSFORMERS_NO_ADVISORY_WARNINGS\"] = \"true\"\n\n# === UPDATE THESE VALUES BEFORE RUNNING ===\nYOUR_HF_USERNAME = \"your-username\" # Replace with your HF username\n\n# Run training\n!python training/train_extended_context.py \\\n --model-path Qwen/Qwen2.5-Coder-1.5B \\\n --data-path training/training-data/tool_examples_combined.jsonl \\\n --output-dir /content/stack-2.9/output/stack-2.9-128k \\\n --context-length 131072 \\\n --lora-rank 64 \\\n --epochs 3 \\\n --batch-size 1 \\\n --grad-accum 16 \\\n --push-to-hub \\\n --hub-model-id YOUR_HF_USERNAME/stack-2.9-128k"
 
 
64
  },
65
  {
66
  "cell_type": "code",
67
  "execution_count": null,
68
  "metadata": {},
69
  "outputs": [],
70
+ "source": "import os\nimport sys\nos.chdir(\"/content/stack-2.9\")\n\n# Set environment for better GPU usage\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\nos.environ[\"TRANSFORMERS_NO_ADVISORY_WARNINGS\"] = \"true\"\n\n# Training parameters - UPDATE THESE\nYOUR_HF_TOKEN = \"YOUR_HF_TOKEN\" # Replace with your token\nYOUR_USERNAME = \"your-username\" # Replace with your HF username\n\n# Run training directly for better output visibility\n!python training/train_extended_context.py \\\n --model-path Qwen/Qwen2.5-Coder-1.5B \\\n --data-path training/training-data/tool_examples_combined.jsonl \\\n --output-dir /content/stack-2.9/output/stack-2.9-128k \\\n --context-length 131072 \\\n --lora-rank 64 \\\n --epochs 3 \\\n --batch-size 1 \\\n --grad-accum 16 \\\n --push-to-hub \\\n --hub-model-id {YOUR_USERNAME}/stack-2.9-128k"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  },
72
  {
73
  "cell_type": "markdown",
 
105
  },
106
  "nbformat": 4,
107
  "nbformat_minor": 4
108
+ }