Spaces:
Sleeping
Sleeping
LorenzoBioinfo
commited on
Commit
·
57062dc
1
Parent(s):
11c7c73
Datapreparation
Browse files- src/data_preparation.py +2 -2
src/data_preparation.py
CHANGED
|
@@ -73,7 +73,7 @@ def prepare_tweet_eval(tokenizer, output_path):
|
|
| 73 |
"label": [2, 0, 1, 2, 0],
|
| 74 |
}
|
| 75 |
ds = safe_load_dataset("tweet_eval", "sentiment", fallback_data=fallback_data)
|
| 76 |
-
ds =
|
| 77 |
ds = ds.map(lambda x: {"text": clean_text(x["text"])})
|
| 78 |
ds = ds.map(tokenize_function, batched=True)
|
| 79 |
ds.save_to_disk(output_path)
|
|
@@ -87,7 +87,7 @@ def prepare_youtube(tokenizer, output_path):
|
|
| 87 |
"Sentiment": ["positive", "negative", "neutral", "positive", "negative"],
|
| 88 |
}
|
| 89 |
ds = safe_load_dataset("AmaanP314/youtube-comment-sentiment", fallback_data=fallback_data)
|
| 90 |
-
ds = ds.select(range(1000))
|
| 91 |
ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
|
| 92 |
ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
|
| 93 |
ds = ds.map(tokenize_function, batched=True)
|
|
|
|
| 73 |
"label": [2, 0, 1, 2, 0],
|
| 74 |
}
|
| 75 |
ds = safe_load_dataset("tweet_eval", "sentiment", fallback_data=fallback_data)
|
| 76 |
+
ds =ds["train"].select(range(1000))
|
| 77 |
ds = ds.map(lambda x: {"text": clean_text(x["text"])})
|
| 78 |
ds = ds.map(tokenize_function, batched=True)
|
| 79 |
ds.save_to_disk(output_path)
|
|
|
|
| 87 |
"Sentiment": ["positive", "negative", "neutral", "positive", "negative"],
|
| 88 |
}
|
| 89 |
ds = safe_load_dataset("AmaanP314/youtube-comment-sentiment", fallback_data=fallback_data)
|
| 90 |
+
ds = ds["train"].select(range(1000))
|
| 91 |
ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
|
| 92 |
ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
|
| 93 |
ds = ds.map(tokenize_function, batched=True)
|