Lead-score / utils /helpers.py
prologlover91's picture
Upload 39 files
f382db8 verified
"""
Shared helper utilities for the AI Marketing Automation app.
"""
import os
import pandas as pd
import numpy as np
import streamlit as st
# ── Paths ──────────────────────────────────────────────────────────────────────
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LEADS_PATH = os.path.join(BASE_DIR, "Leads", "Leads.csv")
BANK_PATH = os.path.join(BASE_DIR, "bank+marketing", "bank-full.csv")
# ── Loaders ────────────────────────────────────────────────────────────────────
@st.cache_data(show_spinner=False)
def load_leads() -> pd.DataFrame:
df = pd.read_csv(LEADS_PATH)
df.columns = df.columns.str.strip()
return df
@st.cache_data(show_spinner=False)
def load_bank() -> pd.DataFrame:
df = pd.read_csv(BANK_PATH, sep=";")
df.columns = df.columns.str.strip()
return df
# ── Common helpers ─────────────────────────────────────────────────────────────
def missing_summary(df: pd.DataFrame) -> pd.DataFrame:
total = df.isnull().sum()
percent = (total / len(df) * 100).round(2)
return pd.DataFrame({"Missing Count": total, "Missing %": percent})\
.query("`Missing Count` > 0").sort_values("Missing %", ascending=False)
def dataset_info(df: pd.DataFrame) -> pd.DataFrame:
info = pd.DataFrame({
"Dtype": df.dtypes.astype(str),
"Non-Null": df.notnull().sum(),
"Nulls": df.isnull().sum(),
"Unique": df.nunique(),
})
return info
PHASE_DESCRIPTIONS = {
1: ("Data Loading & Ingestion", "Load, inspect, and profile both raw datasets."),
2: ("Exploratory Data Analysis", "Visualise distributions, correlations, and target balance."),
3: ("Preprocessing & Feature Eng.", "Clean, encode, engineer features, and handle imbalance."),
4: ("Lead Scoring Models", "Train LR, RF, XGBoost, LightGBM, SVM, and MLP models."),
5: ("Model Evaluation & XAI", "Compare models with ROC/PR curves, SHAP explainability."),
6: ("NLP Analysis", "Sentiment, intent, and keyword analysis on text features."),
7: ("AI Chatbot Lead Qualifier", "Conversational chatbot for real-time lead qualification."),
8: ("ROI Dashboard", "KPI metrics, lead funnel, and ROI calculator."),
}