chatbot / src /train.py
Vivekkrishu's picture
update
f3287af
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
from preprocess import clean_text
# Load dataset
df = pd.read_csv("data/intents.csv")
# Clean text
df["patterns"] = df["patterns"].apply(clean_text)
# Features (patterns) and Labels (tags)
X = df["patterns"]
y = df["tag"]
# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build pipeline
model = Pipeline([
("tfidf", TfidfVectorizer()),
("clf", MultinomialNB())
])
# Train
model.fit(X_train, y_train)
# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
# Save model & responses
joblib.dump(model, "models/lms_chatbot.joblib")
# Save responses by tag
responses = df.groupby("tag")["responses"].apply(list).to_dict()
joblib.dump(responses, "models/responses.joblib")
print("✅ Training complete. Model and responses saved.")