OliverSlivka commited on
Commit
6b88641
·
verified ·
1 Parent(s): 6f23561

Upload run_sft_job.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_sft_job.py +15 -2
run_sft_job.py CHANGED
@@ -39,12 +39,26 @@ from trl import SFTTrainer, SFTConfig
39
 
40
 
41
 
 
 
 
 
 
 
 
 
 
 
42
  # 1. Load Dataset
43
  print("📦 Loading dataset OliverSlivka/itemsety-real-training...")
44
  dataset = load_dataset("OliverSlivka/itemsety-real-training")
 
 
 
 
45
  train_dataset = dataset["train"]
46
  eval_dataset = dataset["validation"]
47
- print(f"✅ Dataset loaded. Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
48
 
49
  # 2. Training Configuration
50
  config = SFTConfig(
@@ -99,7 +113,6 @@ trainer = SFTTrainer(
99
  eval_dataset=eval_dataset, # CRITICAL: Must provide eval_dataset when eval_strategy is enabled
100
  args=config,
101
  peft_config=peft_config,
102
- dataset_text_field="messages",
103
  )
104
 
105
  # 5. Start Training
 
39
 
40
 
41
 
42
+ def format_chat_template(example):
43
+ # This function is used to create a single string from the chat messages.
44
+ # It mimics the format used by the chat templates in the TRL library.
45
+ text = ""
46
+ for message in example["messages"]:
47
+ role = message["role"]
48
+ content = message["content"]
49
+ text += f"**{role.capitalize()}:** {content}\n\n"
50
+ return {"text": text}
51
+
52
  # 1. Load Dataset
53
  print("📦 Loading dataset OliverSlivka/itemsety-real-training...")
54
  dataset = load_dataset("OliverSlivka/itemsety-real-training")
55
+
56
+ # Apply the formatting function to create the 'text' column
57
+ dataset = dataset.map(format_chat_template)
58
+
59
  train_dataset = dataset["train"]
60
  eval_dataset = dataset["validation"]
61
+ print(f"✅ Dataset loaded and formatted. Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
62
 
63
  # 2. Training Configuration
64
  config = SFTConfig(
 
113
  eval_dataset=eval_dataset, # CRITICAL: Must provide eval_dataset when eval_strategy is enabled
114
  args=config,
115
  peft_config=peft_config,
 
116
  )
117
 
118
  # 5. Start Training