Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score | |
| import joblib | |
| from preprocess import clean_text | |
| # Load dataset | |
| df = pd.read_csv("data/intents.csv") | |
| # Clean text | |
| df["patterns"] = df["patterns"].apply(clean_text) | |
| # Features (patterns) and Labels (tags) | |
| X = df["patterns"] | |
| y = df["tag"] | |
| # Train/Test Split | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Build pipeline | |
| model = Pipeline([ | |
| ("tfidf", TfidfVectorizer()), | |
| ("clf", MultinomialNB()) | |
| ]) | |
| # Train | |
| model.fit(X_train, y_train) | |
| # Evaluate | |
| y_pred = model.predict(X_test) | |
| print("Accuracy:", accuracy_score(y_test, y_pred)) | |
| # Save model & responses | |
| joblib.dump(model, "models/lms_chatbot.joblib") | |
| # Save responses by tag | |
| responses = df.groupby("tag")["responses"].apply(list).to_dict() | |
| joblib.dump(responses, "models/responses.joblib") | |
| print("✅ Training complete. Model and responses saved.") | |