Spaces:
Sleeping
Sleeping
| { | |
| "project_name": "Churn Prediction and Engagement Insights Dashboard (v1.0)", | |
| "goal": "To build an interactive dashboard that visualises user churn, engagement, and activity patterns using machine learning and behavioral data extracted from app and fitness center datasets.", | |
| "tech_stack": { | |
| "frontend": "Gradio + Plotly (multi-tab interactive dashboard)", | |
| "ml": "Scikit-learn (Random Forest, Logistic Regression)", | |
| "data_processing": "Pandas + NumPy", | |
| "language": "Python 3.12", | |
| "data_sources": [ | |
| "App usage logs (userid, session_id, timestamp, event_type)", | |
| "Fitness center data (Age, Gender, Dropout, Activity metrics)" | |
| ] | |
| }, | |
| "architecture": { | |
| "data_layer": "Loads raw app and fitness datasets, performs cleaning, feature engineering (session_count, recency, churn label, engagement metrics).", | |
| "model_layer": "Trains Random Forest and Logistic Regression models to predict churn; evaluates accuracy, AUC, and feature importance.", | |
| "dashboard_layer": "Gradio app with four tabs \u2014 Business Insights, Workflow, Model Performance, Fairness \u2014 visualized using Plotly.", | |
| "engagement_layer": "Derived user engagement analytics (session duration, active vs idle time, hourly/weekly usage patterns)." | |
| }, | |
| "folders": { | |
| "app.py": "Main entry point to launch the Gradio dashboard.", | |
| "tabs/business.py": "Shows business KPIs \u2014 churn rate, churn probability scatter, top users, engagement and time-of-day charts.", | |
| "tabs/workflow.py": "Explains the model selection and experimentation workflow.", | |
| "tabs/performance.py": "Displays model metrics, AUC, accuracy, feature importance.", | |
| "tabs/fairness.py": "Shows model fairness across demographic groups.", | |
| "utils/data_prep.py": "Handles dataset loading, cleaning, and preprocessing for app and fitness data.", | |
| "utils/modeling.py": "Contains training, evaluation, and model saving/loading logic.", | |
| "utils/insights.py": "Includes Plotly charts for churn rate, feature importance, and engagement patterns.", | |
| "utils/fairness_utils.py": "Implements fairness metrics via fairlearn.", | |
| "data/": "Holds lsapp.tsv (app logs) and DadosV3.csv (fitness dataset).", | |
| "results/": "Stores model outputs and predictions (e.g., appdata_rf.csv)." | |
| }, | |
| "key_features": [ | |
| "Gradio-based multi-tab dashboard with responsive Plotly charts.", | |
| "End-to-end churn pipeline from data prep to model evaluation.", | |
| "Interactive business visuals: churn donut, churn probability scatter, top users, and activity by hour.", | |
| "User engagement metrics distinguishing total session time vs active interaction time.", | |
| "Feature importance and model performance visualisation (AUC, accuracy).", | |
| "Fairness analysis by gender for churn prediction models." | |
| ], | |
| "dataframes_created": { | |
| "event_df": "Event-level enriched dataset including hour and day of week for each app event.", | |
| "session_df": "Session-level dataset with session duration, engagement time, engagement ratio, and idle time per session.", | |
| "user_df": "User-level summary with total session time, engagement ratio, most active hour/day, and total interactions.", | |
| "predictions_with_features_app": "Merged dataset containing features, true labels, churn predictions, and probabilities.", | |
| "fitness_features": "Cleaned and encoded fitness data used for churn modeling." | |
| }, | |
| "limitations": [ | |
| "LLM simulation not yet integrated (pure ML + analytics).", | |
| "Charts static \u2014 no interactive filtering or user inputs yet.", | |
| "No persistent model registry or retraining pipeline.", | |
| "App currently runs locally; not yet deployed to Hugging Face Spaces.", | |
| "Fairness analysis limited to gender groupings only." | |
| ], | |
| "next_steps": [ | |
| "Integrate RAG-based 'What-if' LLM assistant for simulation (planned for v2.0).", | |
| "Enable dynamic chart interactivity with filters and time selection.", | |
| "Add weekly trend and cohort analysis for engagement and churn patterns.", | |
| "Deploy on Hugging Face Spaces (Gradio + requirements.txt ready).", | |
| "Introduce multi-model comparison (Random Forest, XGBoost, Logistic Regression).", | |
| "Add scenario-based user segmentation using engagement metrics." | |
| ] | |
| } |