dp1812 commited on
Commit
4d8db9e
·
verified ·
1 Parent(s): dea3286

fix: pin transformers stack and force slow tokenizer by default to avoid fast-tokenizer errors

Browse files
Files changed (1) hide show
  1. CELESTIAL_Training_Notebook.ipynb +54 -0
CELESTIAL_Training_Notebook.ipynb CHANGED
@@ -1,5 +1,59 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "metadata": {},
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "metadata": {},
6
+ "execution_count": null,
7
+ "outputs": [],
8
+ "source": [
9
+ "# 🔐 Hugging Face Authentication for Google Colab\n",
10
+ "try:\n",
11
+ " from google.colab import userdata\n",
12
+ " import os\n",
13
+ " hf_token = userdata.get('HF_TOKEN')\n",
14
+ " os.environ['HUGGINGFACE_HUB_TOKEN'] = hf_token\n",
15
+ " print('✅ HF token loaded from Colab secrets')\n",
16
+ "except ImportError:\n",
17
+ " print('⚠️ Not running in Colab, skipping token setup')\n",
18
+ "except Exception as e:\n",
19
+ " print(f'⚠️ Could not load HF_TOKEN from Colab secrets: {e}')\n",
20
+ " print('💡 Add HF_TOKEN to Colab secrets: Secrets tab → Add new secret → Name: HF_TOKEN')\n"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "metadata": {},
26
+ "execution_count": null,
27
+ "outputs": [],
28
+ "source": [
29
+ "# 🔧 Install compatible versions for stable training\n",
30
+ "!pip install -q transformers>=4.36.0 tokenizers>=0.15.0\n",
31
+ "!pip install -q peft>=0.8.0 datasets>=2.16.0 bitsandbytes>=0.42.0 accelerate>=0.26.0 huggingface_hub trl\n",
32
+ "import os; os.environ['TOKENIZERS_PARALLELISM'] = 'false'\n",
33
+ "print('✅ Compatible HF stack installed')\n"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "metadata": {},
39
+ "execution_count": null,
40
+ "outputs": [],
41
+ "source": [
42
+ "# 🔄 Clear any previous patches and restart imports\n",
43
+ "import importlib\n",
44
+ "import sys\n",
45
+ "\n",
46
+ "# Clear transformers from cache if it exists\n",
47
+ "if 'transformers' in sys.modules:\n",
48
+ " del sys.modules['transformers']\n",
49
+ " print('🧹 Cleared transformers from module cache')\n",
50
+ "\n",
51
+ "# Fresh import\n",
52
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
53
+ "print('✅ Fresh transformers import - no patches applied')\n",
54
+ "print('💡 Use explicit parameters: AutoTokenizer.from_pretrained(model, use_fast=False, trust_remote_code=False)')\n"
55
+ ]
56
+ },
57
  {
58
  "cell_type": "code",
59
  "metadata": {},