import re def text_preprocessing(text): text = text.lower() text = re.sub(r'https?://\S+|www\.\S+', '', text) text = re.sub(r'[-+]?[0-9]+', '', text) text = re.sub(r'[^\w\s]','', text) text = text.strip() return text %time data['Text'] = data['Text'].apply(text_preprocessing) raw_data = data.copy() raw_data.head() from sklearn.model_selection import train_test_split df_train, df_test = train_test_split(data, test_size=0.2) df_val, df_test = train_test_split(df_test, test_size=0.6) df_train.shape, df_test.shape, df_val.shape