Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on May 22, 2023

Commit

607a4d3

1 Parent(s): 99383f1

make sure to use train split if loading from hf

Files changed (1) hide show

src/axolotl/utils/data.py CHANGED Viewed

@@ -58,6 +58,7 @@ def load_tokenized_prepared_datasets(tokenizer, cfg, default_dataset_prepared_pa
     try:
         if cfg.push_dataset_to_hub:
             dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
     except:
         pass
@@ -232,6 +233,7 @@ def load_prepare_datasets(tokenizer: PreTrainedTokenizerBase, cfg, default_datas
                     f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
                 )
                 dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
         except:
             pass

     try:
         if cfg.push_dataset_to_hub:
             dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
+            dataset = dataset["train"]
     except:
         pass
                     f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
                 )
                 dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
+                dataset = dataset["train"]
         except:
             pass