LorenzoBioinfo commited on
Commit
51a665e
·
1 Parent(s): e7ccbda

Download data

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/data_preparation.py +2 -0
requirements.txt CHANGED
@@ -13,4 +13,5 @@ huggingface_hub==0.24.6
13
  black
14
  httpx
15
  python-multipart
16
- evaluate
 
 
13
  black
14
  httpx
15
  python-multipart
16
+ evaluate
17
+ huggingface-hub
src/data_preparation.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer
3
  import argparse
4
  import re
5
  import os
 
6
 
7
  MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
8
  PROCESSED_DIR = "data/processed/"
@@ -60,6 +61,7 @@ def prepare_tweet_eval(tokenizer, output_path):
60
 
61
  def prepare_youtube(tokenizer, output_path):
62
  print("Scarico e preparo il dataset YouTube Comments...")
 
63
  ds = load_dataset("AmaanP314/youtube-comment-sentiment")
64
  ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
65
  ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})
 
3
  import argparse
4
  import re
5
  import os
6
+ from huggingface_hub import configure_http_backend
7
 
8
  MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
9
  PROCESSED_DIR = "data/processed/"
 
61
 
62
  def prepare_youtube(tokenizer, output_path):
63
  print("Scarico e preparo il dataset YouTube Comments...")
64
+ configure_http_backend(timeout=60)
65
  ds = load_dataset("AmaanP314/youtube-comment-sentiment")
66
  ds = ds.map(lambda x: {"text": clean_text(x["CommentText"])})
67
  ds = ds.map(lambda x: {"label": map_label(x["Sentiment"])})