model.pkl / lead_scoring_model /train_model.py
gopichandra's picture
Upload 3 files
8fd86d7 verified
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import os
# Load dataset
data = pd.read_csv("lead_data.csv")
# Encode categorical variables
data['lead_source'] = data['lead_source'].astype('category').cat.codes
data['region'] = data['region'].astype('category').cat.codes
# Define features and label
X = data[['lead_source', 'response_time', 'activity_level', 'region']]
y = data['converted']
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Train logistic regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)
# Evaluate
preds = model.predict(X_test)
print("\nModel Performance:\n")
print(classification_report(y_test, preds))
# Save the model
if not os.path.exists("model"):
os.mkdir("model")
joblib.dump(model, "model/model.pkl")
print("✅ Model saved as model/model.pkl")