import pandas as pd import pickle from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from src.pipeline.build_pipeline import build_pipeline def train_model(): columns = ['target', 'id', 'date','flag','user','text'] data = pd.read_csv( 'data/training.csv', names=columns, encoding='ISO-8859-1' ) data.replace({'target': {4: 1}}, inplace=True) data = data.sample(1000, random_state=42) X = data['text'] y = data['target'] x_train, x_test, y_train, y_test = train_test_split( X, y, test_size=0.2, stratify=y, random_state=2 ) pipeline = build_pipeline() pipeline.fit(x_train, y_train) print("Train Accuracy:", accuracy_score(y_train, pipeline.predict(x_train))) print("Test Accuracy:", accuracy_score(y_test, pipeline.predict(x_test))) with open('artifacts/model.pkl', 'wb') as f: pickle.dump(pipeline, f) # testing # def test_ok(): # hdshsdhfs if __name__ == "__main__": train_model()