| import re | |
| def text_preprocessing(text): | |
| text = text.lower() | |
| text = re.sub(r'https?://\S+|www\.\S+', '', text) | |
| text = re.sub(r'[-+]?[0-9]+', '', text) | |
| text = re.sub(r'[^\w\s]','', text) | |
| text = text.strip() | |
| return text | |
| %time data['Text'] = data['Text'].apply(text_preprocessing) | |
| raw_data = data.copy() | |
| raw_data.head() | |
| from sklearn.model_selection import train_test_split | |
| df_train, df_test = train_test_split(data, test_size=0.2) | |
| df_val, df_test = train_test_split(df_test, test_size=0.6) | |
| df_train.shape, df_test.shape, df_val.shape |