""" Shared helper utilities for the AI Marketing Automation app. """ import os import pandas as pd import numpy as np import streamlit as st # ── Paths ────────────────────────────────────────────────────────────────────── BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) LEADS_PATH = os.path.join(BASE_DIR, "Leads", "Leads.csv") BANK_PATH = os.path.join(BASE_DIR, "bank+marketing", "bank-full.csv") # ── Loaders ──────────────────────────────────────────────────────────────────── @st.cache_data(show_spinner=False) def load_leads() -> pd.DataFrame: df = pd.read_csv(LEADS_PATH) df.columns = df.columns.str.strip() return df @st.cache_data(show_spinner=False) def load_bank() -> pd.DataFrame: df = pd.read_csv(BANK_PATH, sep=";") df.columns = df.columns.str.strip() return df # ── Common helpers ───────────────────────────────────────────────────────────── def missing_summary(df: pd.DataFrame) -> pd.DataFrame: total = df.isnull().sum() percent = (total / len(df) * 100).round(2) return pd.DataFrame({"Missing Count": total, "Missing %": percent})\ .query("`Missing Count` > 0").sort_values("Missing %", ascending=False) def dataset_info(df: pd.DataFrame) -> pd.DataFrame: info = pd.DataFrame({ "Dtype": df.dtypes.astype(str), "Non-Null": df.notnull().sum(), "Nulls": df.isnull().sum(), "Unique": df.nunique(), }) return info PHASE_DESCRIPTIONS = { 1: ("Data Loading & Ingestion", "Load, inspect, and profile both raw datasets."), 2: ("Exploratory Data Analysis", "Visualise distributions, correlations, and target balance."), 3: ("Preprocessing & Feature Eng.", "Clean, encode, engineer features, and handle imbalance."), 4: ("Lead Scoring Models", "Train LR, RF, XGBoost, LightGBM, SVM, and MLP models."), 5: ("Model Evaluation & XAI", "Compare models with ROC/PR curves, SHAP explainability."), 6: ("NLP Analysis", "Sentiment, intent, and keyword analysis on text features."), 7: ("AI Chatbot Lead Qualifier", "Conversational chatbot for real-time lead qualification."), 8: ("ROI Dashboard", "KPI metrics, lead funnel, and ROI calculator."), }