Spaces:
Running
Running
LorenzoBioinfo
commited on
Commit
·
51a665e
1
Parent(s):
e7ccbda
Download data
Browse files- requirements.txt +2 -1
- src/data_preparation.py +2 -0
requirements.txt
CHANGED
|
@@ -13,4 +13,5 @@ huggingface_hub==0.24.6
|
|
| 13 |
black
|
| 14 |
httpx
|
| 15 |
python-multipart
|
| 16 |
-
evaluate
|
|
|
|
|
|
| 13 |
black
|
| 14 |
httpx
|
| 15 |
python-multipart
|
| 16 |
+
evaluate
|
| 17 |
+
huggingface-hub
|
src/data_preparation.py
CHANGED
|
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer
|
|
| 3 |
import argparse
|
| 4 |
import re
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 8 |
PROCESSED_DIR = "data/processed/"
|
|
@@ -60,6 +61,7 @@ def prepare_tweet_eval(tokenizer, output_path):
|
|
| 60 |
|
| 61 |
def prepare_youtube(tokenizer, output_path):
|
| 62 |
print("Scarico e preparo il dataset YouTube Comments...")
|
|
|
|
| 63 |
ds = load_dataset("AmaanP314/youtube-comment-sentiment")
|
| 64 |
ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
|
| 65 |
ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
|
|
|
|
| 3 |
import argparse
|
| 4 |
import re
|
| 5 |
import os
|
| 6 |
+
from huggingface_hub import configure_http_backend
|
| 7 |
|
| 8 |
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 9 |
PROCESSED_DIR = "data/processed/"
|
|
|
|
| 61 |
|
| 62 |
def prepare_youtube(tokenizer, output_path):
|
| 63 |
print("Scarico e preparo il dataset YouTube Comments...")
|
| 64 |
+
configure_http_backend(timeout=60)
|
| 65 |
ds = load_dataset("AmaanP314/youtube-comment-sentiment")
|
| 66 |
ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
|
| 67 |
ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
|