stmasson commited on
Commit
6438e23
·
verified ·
1 Parent(s): ebb6ddd

Upload scripts/train_alizee_v2_stage1_sft.py with huggingface_hub

Browse files
scripts/train_alizee_v2_stage1_sft.py CHANGED
@@ -9,6 +9,8 @@
9
  # "bitsandbytes>=0.45.0",
10
  # "trackio",
11
  # "datasets>=3.0.0",
 
 
12
  # ]
13
  # ///
14
 
@@ -66,7 +68,11 @@ print("=" * 60)
66
 
67
  # Load tokenizer
68
  print("\n📝 Loading tokenizer...")
69
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
 
 
 
 
70
  if tokenizer.pad_token is None:
71
  tokenizer.pad_token = tokenizer.eos_token
72
  tokenizer.padding_side = "right"
 
9
  # "bitsandbytes>=0.45.0",
10
  # "trackio",
11
  # "datasets>=3.0.0",
12
+ # "protobuf>=3.20.0",
13
+ # "sentencepiece>=0.2.0",
14
  # ]
15
  # ///
16
 
 
68
 
69
  # Load tokenizer
70
  print("\n📝 Loading tokenizer...")
71
+ tokenizer = AutoTokenizer.from_pretrained(
72
+ BASE_MODEL,
73
+ trust_remote_code=True,
74
+ use_fast=False, # Use slow tokenizer to avoid conversion issues
75
+ )
76
  if tokenizer.pad_token is None:
77
  tokenizer.pad_token = tokenizer.eos_token
78
  tokenizer.padding_side = "right"