KevinKeller commited on
Commit
239cbc2
·
verified ·
1 Parent(s): 7c9b6bf

Upload train_pattern_selector.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_pattern_selector.py +11 -3
train_pattern_selector.py CHANGED
@@ -1,5 +1,5 @@
1
  # /// script
2
- # dependencies = ["trl>=0.17.0", "peft>=0.15.0", "datasets", "transformers", "accelerate", "bitsandbytes"]
3
  # ///
4
 
5
  import os
@@ -9,6 +9,13 @@ from trl import SFTTrainer, SFTConfig
9
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
10
  import torch
11
 
 
 
 
 
 
 
 
12
  print("Loading dataset...")
13
  dataset = load_dataset("KevinKeller/cognitive-pattern-selector-v1")
14
  train_dataset = dataset["train"]
@@ -29,7 +36,8 @@ bnb_config = BitsAndBytesConfig(
29
  )
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
32
- tokenizer.pad_token = tokenizer.eos_token
 
33
 
34
  model = AutoModelForCausalLM.from_pretrained(
35
  model_id,
@@ -71,7 +79,7 @@ trainer = SFTTrainer(
71
  train_dataset=train_dataset,
72
  eval_dataset=eval_dataset,
73
  peft_config=peft_config,
74
- processing_class=tokenizer,
75
  args=training_args,
76
  )
77
 
 
1
  # /// script
2
+ # dependencies = ["trl>=0.12.0", "peft>=0.13.0", "datasets", "transformers>=4.45.0", "accelerate", "bitsandbytes", "huggingface_hub"]
3
  # ///
4
 
5
  import os
 
9
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
10
  import torch
11
 
12
+ # Authenticate
13
+ from huggingface_hub import login
14
+ hf_token = os.environ.get("HF_TOKEN")
15
+ if hf_token:
16
+ login(token=hf_token)
17
+ print("Authenticated with HuggingFace")
18
+
19
  print("Loading dataset...")
20
  dataset = load_dataset("KevinKeller/cognitive-pattern-selector-v1")
21
  train_dataset = dataset["train"]
 
36
  )
37
 
38
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
39
+ if tokenizer.pad_token is None:
40
+ tokenizer.pad_token = tokenizer.eos_token
41
 
42
  model = AutoModelForCausalLM.from_pretrained(
43
  model_id,
 
79
  train_dataset=train_dataset,
80
  eval_dataset=eval_dataset,
81
  peft_config=peft_config,
82
+ tokenizer=tokenizer,
83
  args=training_args,
84
  )
85