File size: 549 Bytes
e2b99db | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | import re
def text_preprocessing(text):
text = text.lower()
text = re.sub(r'https?://\S+|www\.\S+', '', text)
text = re.sub(r'[-+]?[0-9]+', '', text)
text = re.sub(r'[^\w\s]','', text)
text = text.strip()
return text
%time data['Text'] = data['Text'].apply(text_preprocessing)
raw_data = data.copy()
raw_data.head()
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(data, test_size=0.2)
df_val, df_test = train_test_split(df_test, test_size=0.6)
df_train.shape, df_test.shape, df_val.shape |