Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import pickle | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score | |
| from src.pipeline.build_pipeline import build_pipeline | |
| def train_model(): | |
| columns = ['target', 'id', 'date','flag','user','text'] | |
| data = pd.read_csv( | |
| 'data/training.csv', | |
| names=columns, | |
| encoding='ISO-8859-1' | |
| ) | |
| data.replace({'target': {4: 1}}, inplace=True) | |
| data = data.sample(1000, random_state=42) | |
| X = data['text'] | |
| y = data['target'] | |
| x_train, x_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, stratify=y, random_state=2 | |
| ) | |
| pipeline = build_pipeline() | |
| pipeline.fit(x_train, y_train) | |
| print("Train Accuracy:", accuracy_score(y_train, pipeline.predict(x_train))) | |
| print("Test Accuracy:", accuracy_score(y_test, pipeline.predict(x_test))) | |
| with open('artifacts/model.pkl', 'wb') as f: | |
| pickle.dump(pipeline, f) | |
| if __name__ == "__main__": | |
| train_model() | |