make sure to use train split if loading from hf
Browse files
src/axolotl/utils/data.py
CHANGED
|
@@ -58,6 +58,7 @@ def load_tokenized_prepared_datasets(tokenizer, cfg, default_dataset_prepared_pa
|
|
| 58 |
try:
|
| 59 |
if cfg.push_dataset_to_hub:
|
| 60 |
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
|
|
|
| 61 |
except:
|
| 62 |
pass
|
| 63 |
|
|
@@ -232,6 +233,7 @@ def load_prepare_datasets(tokenizer: PreTrainedTokenizerBase, cfg, default_datas
|
|
| 232 |
f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
|
| 233 |
)
|
| 234 |
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
|
|
|
| 235 |
except:
|
| 236 |
pass
|
| 237 |
|
|
|
|
| 58 |
try:
|
| 59 |
if cfg.push_dataset_to_hub:
|
| 60 |
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
| 61 |
+
dataset = dataset["train"]
|
| 62 |
except:
|
| 63 |
pass
|
| 64 |
|
|
|
|
| 233 |
f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
|
| 234 |
)
|
| 235 |
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
| 236 |
+
dataset = dataset["train"]
|
| 237 |
except:
|
| 238 |
pass
|
| 239 |
|