Spaces:
Sleeping
Sleeping
| from data_processing import load_data, spotify_data, path | |
| import pandas | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| import string | |
| #---------------------------Download the requirements NLTK-------------------------------- | |
| #nltk.download('punkt') | |
| #nltk.download('stopwords') | |
| def clean_lyrics(lyrics): | |
| # Tokenización | |
| tokens = word_tokenize(lyrics) | |
| # To lower case | |
| tokens = [word.lower() for word in tokens] | |
| # Delete signs | |
| table = str.maketrans('', '', string.punctuation) | |
| stripped_tokens = [word.translate(table) for word in tokens] | |
| # Stop Words | |
| stop_words = set(stopwords.words('english')) | |
| tokens_without_sw = [word for word in stripped_tokens if word not in stop_words] | |
| return tokens_without_sw | |
| # Apply clean | |
| spotify_data['cleaned_text'] = spotify_data['text'].apply(clean_lyrics) | |
| spotify_data.to_csv('spotify_data_processed.csv', index=False) | |
| #print(spotify_data['cleaned_text'].head()) |