LorenzoBioinfo commited on
Commit
97e2e51
·
1 Parent(s): f42bc7f
Files changed (1) hide show
  1. src/data_preparation.py +1 -1
src/data_preparation.py CHANGED
@@ -79,7 +79,7 @@ def prepare_tweet_eval(tokenizer, output_path):
79
  reduced_splits[split] = ds[split].select(range(min(1000, len(ds[split]))))
80
  reduced_splits[split] = reduced_splits[split].map(lambda x: {"text": clean_text(x["text"])})
81
  reduced_splits[split] = reduced_splits[split].map(tokenize_function, batched=True)
82
- ds = datasets.DatasetDict(reduced_splits)
83
  else:
84
  ds = ds.select(range(min(1000, len(ds))))
85
  ds = ds.map(lambda x: {"text": clean_text(x["text"])})
 
79
  reduced_splits[split] = ds[split].select(range(min(1000, len(ds[split]))))
80
  reduced_splits[split] = reduced_splits[split].map(lambda x: {"text": clean_text(x["text"])})
81
  reduced_splits[split] = reduced_splits[split].map(tokenize_function, batched=True)
82
+ ds = DatasetDict(reduced_splits)
83
  else:
84
  ds = ds.select(range(min(1000, len(ds))))
85
  ds = ds.map(lambda x: {"text": clean_text(x["text"])})