|
|
import pandas as pd
|
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
|
from sklearn.pipeline import Pipeline
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
import joblib
|
|
|
from preprocess import clean_text
|
|
|
|
|
|
|
|
|
df = pd.read_csv("data/intents.csv")
|
|
|
|
|
|
|
|
|
df["patterns"] = df["patterns"].apply(clean_text)
|
|
|
|
|
|
|
|
|
X = df["patterns"]
|
|
|
y = df["tag"]
|
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
|
model = Pipeline([
|
|
|
("tfidf", TfidfVectorizer()),
|
|
|
("clf", MultinomialNB())
|
|
|
])
|
|
|
|
|
|
|
|
|
model.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
y_pred = model.predict(X_test)
|
|
|
print("Accuracy:", accuracy_score(y_test, y_pred))
|
|
|
|
|
|
|
|
|
joblib.dump(model, "models/lms_chatbot.joblib")
|
|
|
|
|
|
|
|
|
responses = df.groupby("tag")["responses"].apply(list).to_dict()
|
|
|
joblib.dump(responses, "models/responses.joblib")
|
|
|
|
|
|
print("✅ Training complete. Model and responses saved.")
|
|
|
|