Emotion-Bert / text-preprocessing
Rizqi's picture
Create text-preprocessing
e2b99db
raw
history blame contribute delete
549 Bytes
import re
def text_preprocessing(text):
text = text.lower()
text = re.sub(r'https?://\S+|www\.\S+', '', text)
text = re.sub(r'[-+]?[0-9]+', '', text)
text = re.sub(r'[^\w\s]','', text)
text = text.strip()
return text
%time data['Text'] = data['Text'].apply(text_preprocessing)
raw_data = data.copy()
raw_data.head()
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(data, test_size=0.2)
df_val, df_test = train_test_split(df_test, test_size=0.6)
df_train.shape, df_test.shape, df_val.shape