import pandas as pd import joblib from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report import os # Load dataset data = pd.read_csv("lead_data.csv") # Encode categorical variables data['lead_source'] = data['lead_source'].astype('category').cat.codes data['region'] = data['region'].astype('category').cat.codes # Define features and label X = data[['lead_source', 'response_time', 'activity_level', 'region']] y = data['converted'] # Split into training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Train logistic regression model model = LogisticRegression(max_iter=200) model.fit(X_train, y_train) # Evaluate preds = model.predict(X_test) print("\nModel Performance:\n") print(classification_report(y_test, preds)) # Save the model if not os.path.exists("model"): os.mkdir("model") joblib.dump(model, "model/model.pkl") print("✅ Model saved as model/model.pkl")