Charlie81 commited on
Commit
6d21fca
·
1 Parent(s): 7ab89f2
Files changed (1) hide show
  1. scripts/train.py +4 -1
scripts/train.py CHANGED
@@ -35,7 +35,10 @@ def main():
35
 
36
  # Load dataset
37
  dataset = load_dataset("allenai/tulu-v2-sft-mixture", split="train")
38
-
 
 
 
39
  def tokenize_function(examples):
40
  text_key = "content" if "content" in examples else "text"
41
  return tokenizer(
 
35
 
36
  # Load dataset
37
  dataset = load_dataset("allenai/tulu-v2-sft-mixture", split="train")
38
+ for i in range(10):
39
+ print("looking")
40
+ print(dataset.column_names)
41
+
42
  def tokenize_function(examples):
43
  text_key = "content" if "content" in examples else "text"
44
  return tokenizer(