Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.metrics import accuracy_score | |
| import joblib | |
| # Load dataset | |
| print("Loading dataset...") | |
| data = pd.read_csv("dataset.csv") | |
| # Preprocessing | |
| X = data['text'] | |
| y = data['label'] | |
| # Vectorization | |
| print("Vectorizing text...") | |
| vectorizer = CountVectorizer() | |
| X_counts = vectorizer.fit_transform(X) | |
| # Split data | |
| X_train, X_test, y_train, y_test = train_test_split(X_counts, y, test_size=0.2, random_state=42) | |
| # Train model | |
| print("Training Naive Bayes model...") | |
| clf = MultinomialNB() | |
| clf.fit(X_train, y_train) | |
| # Evaluate | |
| y_pred = clf.predict(X_test) | |
| print(f"Accuracy: {accuracy_score(y_test, y_pred)}") | |
| # Save model and vectorizer | |
| print("Saving model and vectorizer...") | |
| joblib.dump(clf, "spam_model.pkl") | |
| joblib.dump(vectorizer, "vectorizer.pkl") | |
| print("Done!") | |