user-churn / version-history /project_context_v1.json
VasithaTilakumara
Version 2.0 - added LFS tracking for lsapp.tsv and updated features
53b92fc
{
"project_name": "Churn Prediction and Engagement Insights Dashboard (v1.0)",
"goal": "To build an interactive dashboard that visualises user churn, engagement, and activity patterns using machine learning and behavioral data extracted from app and fitness center datasets.",
"tech_stack": {
"frontend": "Gradio + Plotly (multi-tab interactive dashboard)",
"ml": "Scikit-learn (Random Forest, Logistic Regression)",
"data_processing": "Pandas + NumPy",
"language": "Python 3.12",
"data_sources": [
"App usage logs (userid, session_id, timestamp, event_type)",
"Fitness center data (Age, Gender, Dropout, Activity metrics)"
]
},
"architecture": {
"data_layer": "Loads raw app and fitness datasets, performs cleaning, feature engineering (session_count, recency, churn label, engagement metrics).",
"model_layer": "Trains Random Forest and Logistic Regression models to predict churn; evaluates accuracy, AUC, and feature importance.",
"dashboard_layer": "Gradio app with four tabs \u2014 Business Insights, Workflow, Model Performance, Fairness \u2014 visualized using Plotly.",
"engagement_layer": "Derived user engagement analytics (session duration, active vs idle time, hourly/weekly usage patterns)."
},
"folders": {
"app.py": "Main entry point to launch the Gradio dashboard.",
"tabs/business.py": "Shows business KPIs \u2014 churn rate, churn probability scatter, top users, engagement and time-of-day charts.",
"tabs/workflow.py": "Explains the model selection and experimentation workflow.",
"tabs/performance.py": "Displays model metrics, AUC, accuracy, feature importance.",
"tabs/fairness.py": "Shows model fairness across demographic groups.",
"utils/data_prep.py": "Handles dataset loading, cleaning, and preprocessing for app and fitness data.",
"utils/modeling.py": "Contains training, evaluation, and model saving/loading logic.",
"utils/insights.py": "Includes Plotly charts for churn rate, feature importance, and engagement patterns.",
"utils/fairness_utils.py": "Implements fairness metrics via fairlearn.",
"data/": "Holds lsapp.tsv (app logs) and DadosV3.csv (fitness dataset).",
"results/": "Stores model outputs and predictions (e.g., appdata_rf.csv)."
},
"key_features": [
"Gradio-based multi-tab dashboard with responsive Plotly charts.",
"End-to-end churn pipeline from data prep to model evaluation.",
"Interactive business visuals: churn donut, churn probability scatter, top users, and activity by hour.",
"User engagement metrics distinguishing total session time vs active interaction time.",
"Feature importance and model performance visualisation (AUC, accuracy).",
"Fairness analysis by gender for churn prediction models."
],
"dataframes_created": {
"event_df": "Event-level enriched dataset including hour and day of week for each app event.",
"session_df": "Session-level dataset with session duration, engagement time, engagement ratio, and idle time per session.",
"user_df": "User-level summary with total session time, engagement ratio, most active hour/day, and total interactions.",
"predictions_with_features_app": "Merged dataset containing features, true labels, churn predictions, and probabilities.",
"fitness_features": "Cleaned and encoded fitness data used for churn modeling."
},
"limitations": [
"LLM simulation not yet integrated (pure ML + analytics).",
"Charts static \u2014 no interactive filtering or user inputs yet.",
"No persistent model registry or retraining pipeline.",
"App currently runs locally; not yet deployed to Hugging Face Spaces.",
"Fairness analysis limited to gender groupings only."
],
"next_steps": [
"Integrate RAG-based 'What-if' LLM assistant for simulation (planned for v2.0).",
"Enable dynamic chart interactivity with filters and time selection.",
"Add weekly trend and cohort analysis for engagement and churn patterns.",
"Deploy on Hugging Face Spaces (Gradio + requirements.txt ready).",
"Introduce multi-model comparison (Random Forest, XGBoost, Logistic Regression).",
"Add scenario-based user segmentation using engagement metrics."
]
}