Spaces:
Sleeping
Sleeping
| { | |
| "project_name": "AI-Driven Churn Prediction and Simulation Dashboard", | |
| "goal": "To create an interactive dashboard that predicts user churn using ML models and simulates 'what-if' business scenarios using a local LLM (Ollama + LangChain).", | |
| "tech_stack": { | |
| "frontend": "Gradio + Plotly", | |
| "ml": "Scikit-learn (Random Forest, Logistic Regression)", | |
| "ai_layer": "LangChain + Ollama (Mistral or Gemma3)", | |
| "language": "Python 3.12", | |
| "data": "App usage logs (session_count, recency, avg_session_duration)" | |
| }, | |
| "architecture": { | |
| "data_layer": "Loads and preprocesses raw app logs into feature-engineered dataset", | |
| "model_layer": "Trained ML model saved as random_forest_model.pkl", | |
| "dashboard_layer": "Gradio multi-tab app with Plotly charts", | |
| "ai_layer": "LangChain + Ollama chatbot for natural-language what-if simulation", | |
| "simulation_engine": "Generic 'plan'-based scenario engine (simulate_plan)" | |
| }, | |
| "folders": { | |
| "app.py": "Entry point that builds and runs the dashboard", | |
| "tabs/business.py": "Main business insights tab + AI chatbot integration", | |
| "tabs/shared_ai.py": "LangChain LLM logic (parses user text into JSON plan)", | |
| "utils/insights.py": "Plots churn visuals and performance metrics", | |
| "utils/models.py": "Loads models, provides feature importances and metrics", | |
| "utils/modelling.py": "Model training and evaluation logic", | |
| "utils/data_prep.py": "Feature engineering from raw app usage logs", | |
| "utils/scenario_engine_ng.py": "Latest plan-based simulation engine", | |
| "data/data_randomforest.csv": "Feature-engineered dataset", | |
| "models/random_forest_model.pkl": "Saved ML model used for churn prediction" | |
| }, | |
| "key_features": [ | |
| "Interactive Gradio dashboard with churn insights", | |
| "LLM-powered chatbot for 'what-if' simulations", | |
| "Dynamic plan-based scenario engine (supports scale, shift, set, clip)", | |
| "Model-aware feature validation using feature_names_in_", | |
| "Modular architecture supporting future model or dataset updates" | |
| ], | |
| "limitations": [ | |
| "LLM may generate invalid JSON or non-numeric expressions", | |
| "Simulation only supports numeric features", | |
| "Single model, single dataset workflow (no multi-model comparison yet)", | |
| "No memory of past scenarios", | |
| "Charts static post-simulation (text output only)" | |
| ], | |
| "next_steps": [ | |
| "Add interactive chart refresh after each simulation", | |
| "Enable multiple concurrent model versions (Random Forest, XGBoost, etc.)", | |
| "Integrate scenario history comparison", | |
| "Extend ops: normalize, bucket, optimize", | |
| "Add retraining loop for simulated data" | |
| ], | |
| "progress_summary": { | |
| "version_2_0": { | |
| "focus": "LLM Integration", | |
| "achievements": [ | |
| "Integrated LangChain + Ollama (Mistral) to interpret natural 'what-if' queries.", | |
| "Implemented plan-based scenario simulation via `simulate_plan`.", | |
| "Enabled AI to generate structured JSON outputs to modify dataset features.", | |
| "Created prompt design and Pydantic schema for reliable JSON validation.", | |
| "Built stable baseline churn prediction using Random Forest." | |
| ], | |
| "challenges_and_fixes": [ | |
| "Resolved 'invalid JSON' outputs via strict schema validation.", | |
| "Fixed cross-file function mismatch (simulate_scenario vs simulate_plan).", | |
| "Debugged PromptTemplate brace parsing errors by switching to plain string prompts.", | |
| "Standardized simulation plan parsing and metadata validation." | |
| ] | |
| }, | |
| "version_2_1": { | |
| "focus": "Accuracy, Validation, and Robustness", | |
| "achievements": [ | |
| "Created `feature_metadata.json` for feature-specific type, min, max validation.", | |
| "Implemented metadata-aware validation and clipping in `scenario_engine_ng.py`.", | |
| "Added Pydantic schema enforcement in `shared_ai.py`.", | |
| "Validated correct LLM \u2192 JSON \u2192 Simulation \u2192 Result flow.", | |
| "Developed full unit test suite (`test_simulate_plan_v2_1.py`) verifying all operation types.", | |
| "Refined Gradio front-end (headers, accordions, layout polish)." | |
| ], | |
| "key_learnings": [ | |
| "Discovered Gradio ChatInterface only supports one output \u2192 simplified design.", | |
| "Reinforced modular design separation between AI layer (Pydantic) and simulation layer (dicts).", | |
| "Ensured reproducibility with metadata validation and clipping.", | |
| "Verified average churn deltas using controlled plan testing." | |
| ], | |
| "validation_status": "Version 2.1 unit tests fully passed \u2705" | |
| }, | |
| "version_2_2": { | |
| "focus": "Context-Aware Reasoning & Memory", | |
| "completed": [ | |
| "Integrated simulation history logging to `data/sim_history.csv`.", | |
| "Added `utils/history.py` for logging, loading, and clearing simulation history.", | |
| "Displayed simulation history in `performance.py` (formerly business.py).", | |
| "Introduced manual refresh button for table updates (stable solution)." | |
| ], | |
| "attempted_but_not_pursued": [ | |
| "Attempted real-time event-driven refresh using gr.State (flag).", | |
| "Tried gr.Button-based triggers and .click() propagation (unsupported).", | |
| "Explored ChatInterface multi-output for auto-refresh (not supported).", | |
| "Experimented with LangChain reactivity, eventually replaced with manual refresh." | |
| ], | |
| "next_phase": "Implement Mini-RAG knowledge retrieval system." | |
| } | |
| }, | |
| "planned_versions": { | |
| "version_2_2_remaining": { | |
| "goals": [ | |
| "Implement Mini-RAG Context Layer \u2014 feature_docs + vector store retrieval.", | |
| "Integrate retriever context into LLM prompt before simulation.", | |
| "Add limited simulation history summarization (RAG-based insights).", | |
| "Enable multi-feature plan reasoning ('Increase sessions and reduce recency')." | |
| ], | |
| "implementation_outline": [ | |
| "1. Create `data/feature_docs/` with plain text files describing each feature.", | |
| "2. Implement `utils/retriever.py` using Chroma + OllamaEmbeddings.", | |
| "3. Modify `shared_ai.py` to retrieve and inject relevant context into prompts.", | |
| "4. Add function `build_history_docs()` to summarize last 50 simulations.", | |
| "5. Extend LLM prompt to include context from both feature_docs and history summaries." | |
| ] | |
| }, | |
| "version_2_3": { | |
| "focus": "Enhanced Simulation Intelligence", | |
| "planned_features": [ | |
| "Add explainability mode ('Why did churn drop?').", | |
| "Support categorical features and encoding simulation.", | |
| "Allow multiple concurrent models (RF, XGBoost, Logistic Regression).", | |
| "Enable dynamic model selection from dashboard." | |
| ] | |
| }, | |
| "version_2_4": { | |
| "focus": "Optimization & Learning Loop", | |
| "planned_features": [ | |
| "Introduce simulation optimizer \u2014 find feature combinations minimizing churn.", | |
| "Implement retraining loop using simulated data to fine-tune model.", | |
| "Add continuous evaluation of model drift." | |
| ] | |
| } | |
| }, | |
| "project_structure_notes": { | |
| "frontend": [ | |
| "Gradio multi-tab interface (Business Insights, Model Performance, Simulation History).", | |
| "Performance tab uses refresh button to load sim_history dynamically.", | |
| "AI assistant integrated via `shared_ai.py` uses local LLM (Ollama)." | |
| ], | |
| "backend": [ | |
| "Scenario engine supports 'scale', 'shift', 'set', 'clip' operations.", | |
| "Simulation validated by metadata and schema guards.", | |
| "Data and model persistence via CSV and Pickle respectively." | |
| ], | |
| "tests_and_validation": [ | |
| "Automated test script `test_simulate_plan_v2_1.py` verifies correctness.", | |
| "Console validation confirmed baseline churn predictions consistent." | |
| ] | |
| }, | |
| "future_roadmap": [ | |
| "Version 2.2: Complete Mini-RAG context retrieval (feature_docs + sim_history).", | |
| "Version 2.3: Add explain intent, multi-model, and categorical handling.", | |
| "Version 2.4: Add simulation optimizer and retraining feedback loop.", | |
| "Version 3.0: Deploy dashboard as lightweight local web app with persistent memory." | |
| ], | |
| "version_progression": [ | |
| { | |
| "version": "2.1", | |
| "focus": "Accuracy & Validation", | |
| "key_deliverables": [ | |
| "Strict JSON schema validation (Pydantic)", | |
| "Feature metadata + value clipping", | |
| "Validated simulation engine with automated test suite", | |
| "Ensured reproducibility through metadata-based clipping" | |
| ] | |
| }, | |
| { | |
| "version": "2.2", | |
| "focus": "Contextual Understanding", | |
| "key_deliverables": [ | |
| "Mini-RAG retriever for feature documentation and simulation history", | |
| "Contextual LLM prompting with retrieved feature insights", | |
| "Multi-feature simulation and reasoning capability", | |
| "Model Performance & Simulation History tab integration with refresh workflow" | |
| ] | |
| }, | |
| { | |
| "version": "2.3", | |
| "focus": "Rich Simulation UX & Explainability", | |
| "key_deliverables": [ | |
| "Natural explanations for churn outcomes ('why churn dropped')", | |
| "Scenario comparison and history analytics", | |
| "Categorical feature simulation (encoding awareness)", | |
| "Interactive visual comparison of churn deltas between runs" | |
| ] | |
| }, | |
| { | |
| "version": "2.4+", | |
| "focus": "Optimization & Continuous Learning", | |
| "key_deliverables": [ | |
| "Monte Carlo simulation for uncertainty modeling", | |
| "Automated scenario optimizer to minimize churn", | |
| "Retraining feedback loop using simulated outcomes", | |
| "Continuous evaluation for model drift and data shifts" | |
| ] | |
| }, | |
| { | |
| "version": "3.0", | |
| "focus": "Deployment & Persistence", | |
| "key_deliverables": [ | |
| "Convert dashboard into persistent local web app", | |
| "Enable embedded vector database for RAG memory", | |
| "Support user accounts and saved scenario sessions", | |
| "Integrate feedback from RAG retraining and live data ingestion" | |
| ] | |
| } | |
| ] | |
| } |