debugs
Browse files- scripts/train.py +4 -1
scripts/train.py
CHANGED
|
@@ -35,7 +35,10 @@ def main():
|
|
| 35 |
|
| 36 |
# Load dataset
|
| 37 |
dataset = load_dataset("allenai/tulu-v2-sft-mixture", split="train")
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
def tokenize_function(examples):
|
| 40 |
text_key = "content" if "content" in examples else "text"
|
| 41 |
return tokenizer(
|
|
|
|
| 35 |
|
| 36 |
# Load dataset
|
| 37 |
dataset = load_dataset("allenai/tulu-v2-sft-mixture", split="train")
|
| 38 |
+
for i in range(10):
|
| 39 |
+
print("looking")
|
| 40 |
+
print(dataset.column_names)
|
| 41 |
+
|
| 42 |
def tokenize_function(examples):
|
| 43 |
text_key = "content" if "content" in examples else "text"
|
| 44 |
return tokenizer(
|