Spaces:
Sleeping
Sleeping
LorenzoBioinfo commited on
Commit ·
0d848b5
1
Parent(s): 51a665e
Downlaad http
Browse files- src/data_preparation.py +5 -2
src/data_preparation.py
CHANGED
|
@@ -3,7 +3,10 @@ from transformers import AutoTokenizer
|
|
| 3 |
import argparse
|
| 4 |
import re
|
| 5 |
import os
|
| 6 |
-
from huggingface_hub import configure_http_backend
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 9 |
PROCESSED_DIR = "data/processed/"
|
|
@@ -61,7 +64,7 @@ def prepare_tweet_eval(tokenizer, output_path):
|
|
| 61 |
|
| 62 |
def prepare_youtube(tokenizer, output_path):
|
| 63 |
print("Scarico e preparo il dataset YouTube Comments...")
|
| 64 |
-
configure_http_backend(timeout=60)
|
| 65 |
ds = load_dataset("AmaanP314/youtube-comment-sentiment")
|
| 66 |
ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
|
| 67 |
ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
|
|
|
|
| 3 |
import argparse
|
| 4 |
import re
|
| 5 |
import os
|
| 6 |
+
from huggingface_hub import configure_http_backend, HTTPBackend
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
|
| 11 |
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 12 |
PROCESSED_DIR = "data/processed/"
|
|
|
|
| 64 |
|
| 65 |
def prepare_youtube(tokenizer, output_path):
|
| 66 |
print("Scarico e preparo il dataset YouTube Comments...")
|
| 67 |
+
configure_http_backend(HTTPBackend(timeout=60))
|
| 68 |
ds = load_dataset("AmaanP314/youtube-comment-sentiment")
|
| 69 |
ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
|
| 70 |
ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
|