LorenzoBioinfo commited on
Commit
57062dc
·
1 Parent(s): 11c7c73

Datapreparation

Browse files
Files changed (1) hide show
  1. src/data_preparation.py +2 -2
src/data_preparation.py CHANGED
@@ -73,7 +73,7 @@ def prepare_tweet_eval(tokenizer, output_path):
73
  "label": [2, 0, 1, 2, 0],
74
  }
75
  ds = safe_load_dataset("tweet_eval", "sentiment", fallback_data=fallback_data)
76
- ds = ds.select(range(1000))
77
  ds = ds.map(lambda x: {"text": clean_text(x["text"])})
78
  ds = ds.map(tokenize_function, batched=True)
79
  ds.save_to_disk(output_path)
@@ -87,7 +87,7 @@ def prepare_youtube(tokenizer, output_path):
87
  "Sentiment": ["positive", "negative", "neutral", "positive", "negative"],
88
  }
89
  ds = safe_load_dataset("AmaanP314/youtube-comment-sentiment", fallback_data=fallback_data)
90
- ds = ds.select(range(1000))
91
  ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
92
  ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
93
  ds = ds.map(tokenize_function, batched=True)
 
73
  "label": [2, 0, 1, 2, 0],
74
  }
75
  ds = safe_load_dataset("tweet_eval", "sentiment", fallback_data=fallback_data)
76
+ ds =ds["train"].select(range(1000))
77
  ds = ds.map(lambda x: {"text": clean_text(x["text"])})
78
  ds = ds.map(tokenize_function, batched=True)
79
  ds.save_to_disk(output_path)
 
87
  "Sentiment": ["positive", "negative", "neutral", "positive", "negative"],
88
  }
89
  ds = safe_load_dataset("AmaanP314/youtube-comment-sentiment", fallback_data=fallback_data)
90
+ ds = ds["train"].select(range(1000))
91
  ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
92
  ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
93
  ds = ds.map(tokenize_function, batched=True)