Spaces:
Sleeping
Sleeping
Upload 91 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +29 -0
- README.md +377 -19
- fmcg_genai/QUICK_START.md +85 -0
- fmcg_genai/README.md +351 -0
- fmcg_genai/config.yaml +85 -0
- fmcg_genai/data/processed/cleaned.csv +3 -0
- fmcg_genai/data/processed/data_summary.json +41 -0
- fmcg_genai/data/processed/feature_summary.json +73 -0
- fmcg_genai/data/processed/features.csv +3 -0
- fmcg_genai/data/processed/label_encoders.pkl +3 -0
- fmcg_genai/data/processed/scaler.pkl +3 -0
- fmcg_genai/data/processed/test.csv +0 -0
- fmcg_genai/data/processed/test_features.csv +3 -0
- fmcg_genai/data/processed/train.csv +0 -0
- fmcg_genai/data/processed/train_features.csv +3 -0
- fmcg_genai/data/processed/validation.csv +0 -0
- fmcg_genai/data/processed/validation_features.csv +3 -0
- fmcg_genai/data/raw/FMCG_2022_2024.csv +3 -0
- fmcg_genai/data/raw/batch_MI-006_2025-01-06.parquet +3 -0
- fmcg_genai/data/raw/batch_MI-006_2025-01-13.parquet +3 -0
- fmcg_genai/data/raw/batch_MI-006_2025-01-20.parquet +3 -0
- fmcg_genai/data/raw/batch_MI-006_2025-01-27.parquet +3 -0
- fmcg_genai/data/raw/df_weekly_MI-006_enriched.csv +0 -0
- fmcg_genai/data/raw/weekly_df_final_for_modeling.csv +0 -0
- fmcg_genai/logs/fmcg_pipeline.log +0 -0
- fmcg_genai/models/model_metadata.json +18 -0
- fmcg_genai/models/prophet.pkl +3 -0
- fmcg_genai/models/training_report.json +18 -0
- fmcg_genai/models/xgboost_sales.pkl +3 -0
- fmcg_genai/reports/detailed_metrics.csv +13 -0
- fmcg_genai/reports/evaluation.json +103 -0
- fmcg_genai/reports/model_comparison.html +0 -0
- fmcg_genai/reports/model_evaluation_plots.png +3 -0
- fmcg_genai/reports/prophet_forecast.html +0 -0
- fmcg_genai/reports/rag_test_results.json +67 -0
- fmcg_genai/reports/shap/business_insights.json +90 -0
- fmcg_genai/reports/shap/explainability_report.json +146 -0
- fmcg_genai/reports/shap/interactive_shap_distribution.html +0 -0
- fmcg_genai/reports/shap/interactive_shap_importance.html +0 -0
- fmcg_genai/reports/shap/interactive_shap_summary.html +0 -0
- fmcg_genai/reports/shap/shap_bar_plot.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_brand_vs_category.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_brand_vs_channel.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_brand_vs_region.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_brand_vs_segment.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_category_vs_channel.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_category_vs_region.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_channel_vs_region.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_segment_vs_category.png +3 -0
- fmcg_genai/reports/shap/shap_dependence_segment_vs_channel.png +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,32 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
fmcg_genai/data/processed/cleaned.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
fmcg_genai/data/processed/features.csv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
fmcg_genai/data/processed/test_features.csv filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
fmcg_genai/data/processed/train_features.csv filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
fmcg_genai/data/processed/validation_features.csv filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
fmcg_genai/data/raw/FMCG_2022_2024.csv filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
fmcg_genai/reports/model_evaluation_plots.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
fmcg_genai/reports/shap/shap_bar_plot.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
fmcg_genai/reports/shap/shap_dependence_brand_vs_category.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
fmcg_genai/reports/shap/shap_dependence_brand_vs_channel.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
fmcg_genai/reports/shap/shap_dependence_brand_vs_region.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
fmcg_genai/reports/shap/shap_dependence_brand_vs_segment.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
fmcg_genai/reports/shap/shap_dependence_category_vs_channel.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
fmcg_genai/reports/shap/shap_dependence_category_vs_region.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
fmcg_genai/reports/shap/shap_dependence_channel_vs_region.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
fmcg_genai/reports/shap/shap_dependence_segment_vs_category.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
fmcg_genai/reports/shap/shap_dependence_segment_vs_channel.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
fmcg_genai/reports/shap/shap_dependence_segment_vs_region.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
fmcg_genai/reports/shap/shap_dependence_sku_vs_brand.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
fmcg_genai/reports/shap/shap_dependence_sku_vs_category.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
fmcg_genai/reports/shap/shap_dependence_sku_vs_channel.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
fmcg_genai/reports/shap/shap_dependence_sku_vs_region.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
fmcg_genai/reports/shap/shap_dependence_sku_vs_segment.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
fmcg_genai/reports/shap/shap_summary_plot.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
fmcg_genai/reports/shap/shap_waterfall_sample_1.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
fmcg_genai/reports/shap/shap_waterfall_sample_2.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
fmcg_genai/reports/shap/shap_waterfall_sample_3.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
fmcg_genai/reports/shap/shap_waterfall_sample_4.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
fmcg_genai/reports/shap/shap_waterfall_sample_5.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,19 +1,377 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: FMCG Demand Forecasting with RAG
|
| 3 |
+
emoji: 📊
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: "1.25.0"
|
| 8 |
+
app_file: fmcg_genai/src/dashboard_app_enhanced.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# 📊 FMCG Demand Forecasting with RAG
|
| 14 |
+
|
| 15 |
+
An advanced AI-powered analytics platform for FMCG (Fast-Moving Consumer Goods) sales forecasting and business intelligence. This system combines **Machine Learning**, **Time Series Forecasting**, and **Retrieval-Augmented Generation (RAG)** to provide comprehensive sales insights and predictions.
|
| 16 |
+
|
| 17 |
+
[](https://streamlit.io/)
|
| 18 |
+
[](https://www.python.org/)
|
| 19 |
+
[](https://pytorch.org/)
|
| 20 |
+
[](https://huggingface.co/spaces)
|
| 21 |
+
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
## 🚀 Live Demo
|
| 25 |
+
|
| 26 |
+
**[Try it on Hugging Face Spaces →](#)** *(Link will be available after deployment)*
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## ✨ Key Features
|
| 31 |
+
|
| 32 |
+
### 📈 **Advanced Sales Analytics**
|
| 33 |
+
- **Real-time KPI Dashboard**: Track total sales, revenue, average pricing, and product portfolio metrics
|
| 34 |
+
- **Interactive Visualizations**: Dynamic charts with Plotly for sales trends, regional performance, and category distribution
|
| 35 |
+
- **Trend Analysis**: 30-day moving averages, growth comparisons, and seasonal pattern detection
|
| 36 |
+
- **Promotion Impact Analysis**: Measure the effectiveness of promotional campaigns with sales lift calculations
|
| 37 |
+
|
| 38 |
+
### 🔮 **AI-Powered Forecasting**
|
| 39 |
+
- **Prophet Time Series Model**: Facebook's Prophet for robust seasonal forecasting
|
| 40 |
+
- **XGBoost ML Model**: Gradient boosting for feature-based predictions
|
| 41 |
+
- **Multi-Scenario Forecasting**: Best case, worst case, and confidence interval predictions
|
| 42 |
+
- **Customizable Horizons**: Forecast from 7 to 90 days ahead
|
| 43 |
+
- **Trend Decomposition**: Understand seasonal, weekly, and trend components
|
| 44 |
+
|
| 45 |
+
### 🤖 **RAG-Based Q&A System**
|
| 46 |
+
- **Natural Language Queries**: Ask questions about your data in plain English
|
| 47 |
+
- **Intelligent Context Retrieval**: FAISS vector database for semantic search
|
| 48 |
+
- **Analytical Answers**: Get data-driven insights, not just text extraction
|
| 49 |
+
- **Pre-built Query Templates**: Quick access to common business questions
|
| 50 |
+
- **Query History**: Track and revisit previous questions and answers
|
| 51 |
+
|
| 52 |
+
### 📊 **Business Intelligence**
|
| 53 |
+
- **Feature Importance Analysis**: Understand which factors drive sales the most
|
| 54 |
+
- **Regional Performance Breakdown**: Compare sales across different regions
|
| 55 |
+
- **Category Distribution**: Analyze product category contributions
|
| 56 |
+
- **Seasonal Insights**: Identify peak and low sales periods
|
| 57 |
+
- **Promotion Effectiveness**: Quantify promotional impact on sales
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
## 🏗️ System Architecture
|
| 62 |
+
|
| 63 |
+
```
|
| 64 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 65 |
+
│ Streamlit Dashboard UI │
|
| 66 |
+
│ ┌─────────────────────┐ ┌──────────────────────────────┐ │
|
| 67 |
+
│ │ Analytics & KPIs │ │ AI Q&A Portal (RAG) │ │
|
| 68 |
+
│ │ - Sales Trends │ │ - Natural Language Queries │ │
|
| 69 |
+
│ │ - Forecasting │ │ - Semantic Search │ │
|
| 70 |
+
│ │ - Visualizations │ │ - Context Retrieval │ │
|
| 71 |
+
│ └─────────────────────┘ └──────────────────────────────┘ │
|
| 72 |
+
└─────────────────────────────────────────────────────────────┘
|
| 73 |
+
↓
|
| 74 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 75 |
+
│ ML/AI Engine │
|
| 76 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │
|
| 77 |
+
│ │ Prophet │ │ XGBoost │ │ RAG Pipeline │ │
|
| 78 |
+
│ │ Forecasting │ │ ML Model │ │ - FAISS Vector │ │
|
| 79 |
+
│ │ │ │ │ │ - Transformers │ │
|
| 80 |
+
│ └──────────────┘ └──────────────┘ └──────────────────┘ │
|
| 81 |
+
└─────────────────────────────────────────────────────────────┘
|
| 82 |
+
↓
|
| 83 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 84 |
+
│ Data Layer │
|
| 85 |
+
│ - Processed FMCG Sales Data (2022-2024) │
|
| 86 |
+
│ - Feature Engineering Pipeline │
|
| 87 |
+
│ - Vector Store (Embeddings) │
|
| 88 |
+
└─────────────────────────────────────────────────────────────┘
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## 🛠️ Technology Stack
|
| 94 |
+
|
| 95 |
+
### **Core ML/AI**
|
| 96 |
+
- **Prophet**: Time series forecasting with seasonality detection
|
| 97 |
+
- **XGBoost**: Gradient boosting for feature-based predictions
|
| 98 |
+
- **Sentence Transformers**: Text embeddings for semantic search
|
| 99 |
+
- **FAISS**: Efficient similarity search and clustering
|
| 100 |
+
- **LangChain**: RAG pipeline orchestration
|
| 101 |
+
|
| 102 |
+
### **Data Processing**
|
| 103 |
+
- **Pandas & NumPy**: Data manipulation and numerical computing
|
| 104 |
+
- **Scikit-learn**: Feature engineering and preprocessing
|
| 105 |
+
|
| 106 |
+
### **Visualization**
|
| 107 |
+
- **Plotly**: Interactive charts and graphs
|
| 108 |
+
- **Streamlit**: Web application framework
|
| 109 |
+
- **Matplotlib & Seaborn**: Statistical visualizations
|
| 110 |
+
|
| 111 |
+
### **Deep Learning**
|
| 112 |
+
- **PyTorch**: Neural network framework
|
| 113 |
+
- **Transformers (Hugging Face)**: Pre-trained language models
|
| 114 |
+
|
| 115 |
+
---
|
| 116 |
+
|
| 117 |
+
## 📦 Installation & Setup
|
| 118 |
+
|
| 119 |
+
### **Prerequisites**
|
| 120 |
+
- Python 3.8 or higher
|
| 121 |
+
- 4GB+ RAM recommended
|
| 122 |
+
- Git
|
| 123 |
+
|
| 124 |
+
### **Local Installation**
|
| 125 |
+
|
| 126 |
+
1. **Clone the repository**
|
| 127 |
+
```bash
|
| 128 |
+
git clone https://github.com/Ameya-Bhingurde/FMCG-Demand-Forecasting-with-RAG-.git
|
| 129 |
+
cd FMCG-Demand-Forecasting-with-RAG-
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
2. **Create virtual environment**
|
| 133 |
+
```bash
|
| 134 |
+
python -m venv .venv
|
| 135 |
+
# Windows
|
| 136 |
+
.venv\Scripts\activate
|
| 137 |
+
# Linux/Mac
|
| 138 |
+
source .venv/bin/activate
|
| 139 |
+
```
|
| 140 |
+
|
| 141 |
+
3. **Install dependencies**
|
| 142 |
+
```bash
|
| 143 |
+
cd fmcg_genai
|
| 144 |
+
pip install -r requirements.txt
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
4. **Run the pipeline** (First time setup)
|
| 148 |
+
```bash
|
| 149 |
+
# From the fmcg_genai directory
|
| 150 |
+
python run_pipeline.py
|
| 151 |
+
```
|
| 152 |
+
This will:
|
| 153 |
+
- Process the raw data
|
| 154 |
+
- Train ML models
|
| 155 |
+
- Create vector store for RAG
|
| 156 |
+
|
| 157 |
+
5. **Launch the dashboard**
|
| 158 |
+
```bash
|
| 159 |
+
streamlit run src/dashboard_app_enhanced.py
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
The dashboard will open at `http://localhost:8501`
|
| 163 |
+
|
| 164 |
+
---
|
| 165 |
+
|
| 166 |
+
## 🎯 How to Use
|
| 167 |
+
|
| 168 |
+
### **Dashboard & Forecasting Page**
|
| 169 |
+
|
| 170 |
+
1. **View KPIs**: See real-time metrics for sales, revenue, pricing, and product portfolio
|
| 171 |
+
2. **Analyze Trends**: Explore interactive charts showing sales patterns, regional performance, and category distribution
|
| 172 |
+
3. **Generate Forecasts**:
|
| 173 |
+
- Use the slider to select forecast horizon (7-90 days)
|
| 174 |
+
- Choose confidence level (80-95%)
|
| 175 |
+
- Toggle scenario analysis for best/worst case predictions
|
| 176 |
+
4. **Understand Drivers**: Review feature importance to see what factors influence sales most
|
| 177 |
+
|
| 178 |
+
### **AI Q&A Portal**
|
| 179 |
+
|
| 180 |
+
1. **Quick Questions**: Click pre-built query buttons for common analyses
|
| 181 |
+
- Sales Performance: "What were total sales in 2023?"
|
| 182 |
+
- Promotions: "How did promotions affect sales?"
|
| 183 |
+
- Trends: "What are the seasonal sales patterns?"
|
| 184 |
+
|
| 185 |
+
2. **Custom Queries**: Type your own questions in natural language
|
| 186 |
+
```
|
| 187 |
+
Examples:
|
| 188 |
+
- "Which region had the highest sales growth in Q2 2024?"
|
| 189 |
+
- "What is the average price for beverages?"
|
| 190 |
+
- "How does stock availability impact sales?"
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
3. **View Sources**: Expand the sources section to see the data context used for answers
|
| 194 |
+
|
| 195 |
+
4. **Review History**: Check recent questions in the query history section
|
| 196 |
+
|
| 197 |
+
---
|
| 198 |
+
|
| 199 |
+
## 📊 Data Overview
|
| 200 |
+
|
| 201 |
+
The system analyzes FMCG sales data with the following attributes:
|
| 202 |
+
|
| 203 |
+
- **Time Period**: 2022-2024
|
| 204 |
+
- **Products**: Multiple SKUs across various categories
|
| 205 |
+
- **Regions**: Multi-regional sales data
|
| 206 |
+
- **Features**:
|
| 207 |
+
- Sales volume (units sold)
|
| 208 |
+
- Pricing information
|
| 209 |
+
- Promotion flags
|
| 210 |
+
- Stock availability
|
| 211 |
+
- Seasonal indicators
|
| 212 |
+
- Regional data
|
| 213 |
+
- Category classifications
|
| 214 |
+
|
| 215 |
+
---
|
| 216 |
+
|
| 217 |
+
## 🧠 Model Details
|
| 218 |
+
|
| 219 |
+
### **Prophet Forecasting Model**
|
| 220 |
+
- **Purpose**: Time series forecasting with trend and seasonality
|
| 221 |
+
- **Strengths**:
|
| 222 |
+
- Handles missing data
|
| 223 |
+
- Detects seasonal patterns (weekly, monthly, yearly)
|
| 224 |
+
- Provides uncertainty intervals
|
| 225 |
+
- Robust to outliers
|
| 226 |
+
|
| 227 |
+
### **XGBoost Model**
|
| 228 |
+
- **Purpose**: Feature-based sales prediction
|
| 229 |
+
- **Features Used**:
|
| 230 |
+
- Temporal features (day, month, year, day of week)
|
| 231 |
+
- Lag features (previous sales)
|
| 232 |
+
- Promotion indicators
|
| 233 |
+
- Stock availability
|
| 234 |
+
- Regional and category encodings
|
| 235 |
+
- **Strengths**:
|
| 236 |
+
- High accuracy
|
| 237 |
+
- Feature importance analysis
|
| 238 |
+
- Handles non-linear relationships
|
| 239 |
+
|
| 240 |
+
### **RAG Pipeline**
|
| 241 |
+
- **Embedding Model**: Sentence Transformers (all-MiniLM-L6-v2)
|
| 242 |
+
- **Vector Store**: FAISS for efficient similarity search
|
| 243 |
+
- **Retrieval**: Top-k semantic search (k=5)
|
| 244 |
+
- **Generation**: Context-aware analytical answers
|
| 245 |
+
- **Strengths**:
|
| 246 |
+
- Natural language understanding
|
| 247 |
+
- Accurate data retrieval
|
| 248 |
+
- Analytical insights generation
|
| 249 |
+
|
| 250 |
+
---
|
| 251 |
+
|
| 252 |
+
## 🔧 Configuration
|
| 253 |
+
|
| 254 |
+
Edit `config.yaml` to customize:
|
| 255 |
+
|
| 256 |
+
```yaml
|
| 257 |
+
data:
|
| 258 |
+
raw_dir: "data/raw"
|
| 259 |
+
processed_dir: "data/processed"
|
| 260 |
+
|
| 261 |
+
models:
|
| 262 |
+
prophet_model: "models/prophet_model.pkl"
|
| 263 |
+
xgboost_model: "models/xgboost_model.pkl"
|
| 264 |
+
|
| 265 |
+
rag:
|
| 266 |
+
vector_store_path: "vector_store"
|
| 267 |
+
embedding_model: "sentence-transformers/all-MiniLM-L6-v2"
|
| 268 |
+
chunk_size: 500
|
| 269 |
+
chunk_overlap: 50
|
| 270 |
+
```
|
| 271 |
+
|
| 272 |
+
---
|
| 273 |
+
|
| 274 |
+
## 📁 Project Structure
|
| 275 |
+
|
| 276 |
+
```
|
| 277 |
+
FMCG-Demand-Forecasting-with-RAG-/
|
| 278 |
+
├── fmcg_genai/
|
| 279 |
+
│ ├── src/
|
| 280 |
+
│ │ ├── dashboard_app_enhanced.py # Main Streamlit dashboard
|
| 281 |
+
│ │ ├── rag_pipeline.py # RAG implementation
|
| 282 |
+
│ │ ├── data_preprocessing.py # Data cleaning & feature engineering
|
| 283 |
+
│ │ ├── model_training.py # ML model training
|
| 284 |
+
│ │ └── forecasting.py # Prophet forecasting
|
| 285 |
+
│ ├── data/
|
| 286 |
+
│ │ ├── raw/ # Original datasets
|
| 287 |
+
│ │ └── processed/ # Cleaned & engineered features
|
| 288 |
+
│ ├── models/ # Trained model files
|
| 289 |
+
│ ├── vector_store/ # FAISS index & embeddings
|
| 290 |
+
│ ├── requirements.txt # Python dependencies
|
| 291 |
+
│ ├── config.yaml # Configuration file
|
| 292 |
+
│ └── run_pipeline.py # Pipeline orchestration
|
| 293 |
+
├── README.md
|
| 294 |
+
└── LICENSE
|
| 295 |
+
```
|
| 296 |
+
|
| 297 |
+
---
|
| 298 |
+
|
| 299 |
+
## 🚀 Deployment
|
| 300 |
+
|
| 301 |
+
### **Hugging Face Spaces** (Recommended)
|
| 302 |
+
|
| 303 |
+
This app is optimized for Hugging Face Spaces deployment:
|
| 304 |
+
|
| 305 |
+
1. **Fork/Clone** this repository
|
| 306 |
+
2. **Create a new Space** on Hugging Face
|
| 307 |
+
3. **Connect** your GitHub repository
|
| 308 |
+
4. **Configure** Space settings:
|
| 309 |
+
- SDK: Streamlit
|
| 310 |
+
- Python version: 3.8+
|
| 311 |
+
5. **Deploy** - Automatic build and deployment
|
| 312 |
+
|
| 313 |
+
The app will be available at: `https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME`
|
| 314 |
+
|
| 315 |
+
### **Other Platforms**
|
| 316 |
+
|
| 317 |
+
- **Railway**: Supports Python apps with 1GB+ RAM
|
| 318 |
+
- **Google Cloud Run**: Serverless deployment with auto-scaling
|
| 319 |
+
- **AWS EC2**: Full control with custom instance sizing
|
| 320 |
+
|
| 321 |
+
---
|
| 322 |
+
|
| 323 |
+
## 📈 Performance Metrics
|
| 324 |
+
|
| 325 |
+
- **Forecast Accuracy**: MAPE < 15% on test set
|
| 326 |
+
- **RAG Retrieval**: 95%+ relevant context retrieval
|
| 327 |
+
- **Dashboard Load Time**: < 3 seconds
|
| 328 |
+
- **Query Response Time**: < 2 seconds
|
| 329 |
+
|
| 330 |
+
---
|
| 331 |
+
|
| 332 |
+
## 🤝 Contributing
|
| 333 |
+
|
| 334 |
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
| 335 |
+
|
| 336 |
+
1. Fork the repository
|
| 337 |
+
2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
|
| 338 |
+
3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
|
| 339 |
+
4. Push to the branch (`git push origin feature/AmazingFeature`)
|
| 340 |
+
5. Open a Pull Request
|
| 341 |
+
|
| 342 |
+
---
|
| 343 |
+
|
| 344 |
+
## 📝 License
|
| 345 |
+
|
| 346 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
| 347 |
+
|
| 348 |
+
---
|
| 349 |
+
|
| 350 |
+
## 👨💻 Author
|
| 351 |
+
|
| 352 |
+
**Ameya Bhingurde**
|
| 353 |
+
|
| 354 |
+
- GitHub: [@Ameya-Bhingurde](https://github.com/Ameya-Bhingurde)
|
| 355 |
+
- LinkedIn: [Connect with me](https://www.linkedin.com/in/ameya-bhingurde)
|
| 356 |
+
|
| 357 |
+
---
|
| 358 |
+
|
| 359 |
+
## 🙏 Acknowledgments
|
| 360 |
+
|
| 361 |
+
- **Facebook Prophet** for the excellent time series forecasting library
|
| 362 |
+
- **Hugging Face** for Transformers and hosting platform
|
| 363 |
+
- **Streamlit** for the amazing web app framework
|
| 364 |
+
- **LangChain** for RAG pipeline tools
|
| 365 |
+
|
| 366 |
+
---
|
| 367 |
+
|
| 368 |
+
## 📧 Contact
|
| 369 |
+
|
| 370 |
+
For questions or feedback, please open an issue or reach out via [GitHub](https://github.com/Ameya-Bhingurde).
|
| 371 |
+
|
| 372 |
+
---
|
| 373 |
+
|
| 374 |
+
<div align="center">
|
| 375 |
+
<p><strong>⭐ If you find this project useful, please consider giving it a star! ⭐</strong></p>
|
| 376 |
+
<p>Made with ❤️ and AI</p>
|
| 377 |
+
</div>
|
fmcg_genai/QUICK_START.md
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 FMCG Pipeline - Quick Start Guide
|
| 2 |
+
|
| 3 |
+
## ⚡ Get Started in 5 Minutes
|
| 4 |
+
|
| 5 |
+
### 1. Install Dependencies
|
| 6 |
+
```bash
|
| 7 |
+
pip install -r requirements.txt
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
### 2. Test Your Setup
|
| 11 |
+
```bash
|
| 12 |
+
python test_setup.py
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
### 3. Run the Pipeline
|
| 16 |
+
```bash
|
| 17 |
+
# Option A: Use the startup script (recommended)
|
| 18 |
+
python start_pipeline.py
|
| 19 |
+
|
| 20 |
+
# Option B: Run directly
|
| 21 |
+
python run_pipeline.py
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
### 4. Launch Dashboard
|
| 25 |
+
```bash
|
| 26 |
+
streamlit run src/dashboard_app.py
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## 📁 What You Get
|
| 30 |
+
|
| 31 |
+
After running the pipeline, you'll have:
|
| 32 |
+
|
| 33 |
+
### 🎯 Trained Models
|
| 34 |
+
- **Prophet**: Time series forecasting model
|
| 35 |
+
- **XGBoost**: Sales prediction model
|
| 36 |
+
- Location: `models/`
|
| 37 |
+
|
| 38 |
+
### 📊 Reports & Visualizations
|
| 39 |
+
- Model evaluation metrics
|
| 40 |
+
- SHAP explainability plots
|
| 41 |
+
- Interactive visualizations
|
| 42 |
+
- Location: `reports/`
|
| 43 |
+
|
| 44 |
+
### 🤖 RAG System
|
| 45 |
+
- FAISS vector database
|
| 46 |
+
- Natural language querying
|
| 47 |
+
- Business insights
|
| 48 |
+
- Location: `vector_store/`
|
| 49 |
+
|
| 50 |
+
### 📈 Dashboard Features
|
| 51 |
+
- Sales overview and trends
|
| 52 |
+
- Real-time forecasting
|
| 53 |
+
- Model explanations
|
| 54 |
+
- AI-powered business queries
|
| 55 |
+
|
| 56 |
+
## 🔍 Sample Queries
|
| 57 |
+
|
| 58 |
+
Ask the RAG system questions like:
|
| 59 |
+
- "What were the total sales in 2023?"
|
| 60 |
+
- "Which product had the highest sales?"
|
| 61 |
+
- "How did promotions affect sales performance?"
|
| 62 |
+
- "What caused the sales dip in Q2 2023?"
|
| 63 |
+
|
| 64 |
+
## 🛠️ Troubleshooting
|
| 65 |
+
|
| 66 |
+
### Common Issues
|
| 67 |
+
1. **Memory errors**: Reduce batch sizes in `config.yaml`
|
| 68 |
+
2. **Import errors**: Run `pip install -r requirements.txt`
|
| 69 |
+
3. **Model loading**: Ensure all dependencies are installed
|
| 70 |
+
4. **Dashboard issues**: Check Streamlit installation
|
| 71 |
+
|
| 72 |
+
### Get Help
|
| 73 |
+
- Check logs in `logs/fmcg_pipeline.log`
|
| 74 |
+
- Review `README.md` for detailed documentation
|
| 75 |
+
- Run `python test_setup.py` to diagnose issues
|
| 76 |
+
|
| 77 |
+
## 🎉 Success!
|
| 78 |
+
|
| 79 |
+
Your FMCG analytics pipeline is ready! You now have:
|
| 80 |
+
- ✅ Production-ready ML models
|
| 81 |
+
- ✅ Interactive dashboard
|
| 82 |
+
- ✅ AI-powered business insights
|
| 83 |
+
- ✅ Comprehensive documentation
|
| 84 |
+
|
| 85 |
+
**Ready to transform your FMCG business with AI! 🚀**
|
fmcg_genai/README.md
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 FMCG Sales Analytics & Forecasting Pipeline
|
| 2 |
+
|
| 3 |
+
A comprehensive **end-to-end ML + Generative AI (RAG)** workflow for FMCG sales forecasting and business insights. This production-grade system combines traditional machine learning with cutting-edge generative AI to provide actionable business intelligence.
|
| 4 |
+
|
| 5 |
+
## 📊 Project Overview
|
| 6 |
+
|
| 7 |
+
This project demonstrates a complete **resume-ready** implementation of:
|
| 8 |
+
- **Data Preprocessing & Feature Engineering**
|
| 9 |
+
- **Time Series Forecasting** (Prophet)
|
| 10 |
+
- **Machine Learning** (XGBoost)
|
| 11 |
+
- **Model Explainability** (SHAP)
|
| 12 |
+
- **Generative AI** (RAG with LangChain + FAISS)
|
| 13 |
+
- **Interactive Dashboard** (Streamlit)
|
| 14 |
+
|
| 15 |
+
## 🏗️ Architecture
|
| 16 |
+
|
| 17 |
+
```
|
| 18 |
+
fmcg_genai/
|
| 19 |
+
│── data/
|
| 20 |
+
│ ├── raw/ # Raw FMCG datasets
|
| 21 |
+
│ └── processed/ # Cleaned & engineered data
|
| 22 |
+
├── src/
|
| 23 |
+
│ ├── data_preprocessing.py # Data cleaning & time-series split
|
| 24 |
+
│ ├── feature_engineering.py # Feature creation & encoding
|
| 25 |
+
│ ├── train_models.py # Prophet & XGBoost training
|
| 26 |
+
│ ├── evaluate_models.py # Model evaluation & metrics
|
| 27 |
+
│ ├── explainability.py # SHAP explanations
|
| 28 |
+
│ ├── rag_pipeline.py # RAG system for queries
|
| 29 |
+
│ └── dashboard_app.py # Streamlit dashboard
|
| 30 |
+
├── models/ # Trained models (joblib/pkl)
|
| 31 |
+
├── vector_store/ # FAISS index for RAG
|
| 32 |
+
├── reports/ # Evaluation reports & SHAP plots
|
| 33 |
+
├── config.yaml # Central configuration
|
| 34 |
+
├── requirements.txt # Dependencies
|
| 35 |
+
├── run_pipeline.py # Main orchestrator
|
| 36 |
+
└── README.md # This file
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
## 🛠️ Tech Stack
|
| 40 |
+
|
| 41 |
+
### Core ML & Data Science
|
| 42 |
+
- **Python 3.8+** - Main programming language
|
| 43 |
+
- **Pandas & NumPy** - Data manipulation
|
| 44 |
+
- **Scikit-learn** - Machine learning utilities
|
| 45 |
+
- **XGBoost** - Gradient boosting for sales prediction
|
| 46 |
+
- **Prophet** - Time series forecasting
|
| 47 |
+
|
| 48 |
+
### Generative AI & RAG
|
| 49 |
+
- **LangChain** - RAG pipeline orchestration
|
| 50 |
+
- **FAISS** - Vector similarity search
|
| 51 |
+
- **HuggingFace Transformers** - Embeddings & LLMs
|
| 52 |
+
- **Sentence Transformers** - Text embeddings
|
| 53 |
+
|
| 54 |
+
### Visualization & Explainability
|
| 55 |
+
- **SHAP** - Model explainability
|
| 56 |
+
- **Plotly** - Interactive visualizations
|
| 57 |
+
- **Matplotlib & Seaborn** - Static plots
|
| 58 |
+
|
| 59 |
+
### Dashboard & Deployment
|
| 60 |
+
- **Streamlit** - Interactive web dashboard
|
| 61 |
+
- **PyYAML** - Configuration management
|
| 62 |
+
|
| 63 |
+
## 🚀 Quick Start
|
| 64 |
+
|
| 65 |
+
### 1. Setup Environment
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# Clone the repository
|
| 69 |
+
git clone <repository-url>
|
| 70 |
+
cd fmcg_genai
|
| 71 |
+
|
| 72 |
+
# Create virtual environment
|
| 73 |
+
python -m venv venv
|
| 74 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 75 |
+
|
| 76 |
+
# Install dependencies
|
| 77 |
+
pip install -r requirements.txt
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
### 2. Configure the Project
|
| 81 |
+
|
| 82 |
+
Edit `config.yaml` to customize:
|
| 83 |
+
- Data paths
|
| 84 |
+
- Model parameters
|
| 85 |
+
- RAG settings
|
| 86 |
+
- API keys (if using OpenAI)
|
| 87 |
+
|
| 88 |
+
### 3. Run the Complete Pipeline
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
# Run all steps
|
| 92 |
+
python run_pipeline.py
|
| 93 |
+
|
| 94 |
+
# Or run specific steps
|
| 95 |
+
python run_pipeline.py --skip preprocessing feature_engineering
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
### 4. Launch the Dashboard
|
| 99 |
+
|
| 100 |
+
```bash
|
| 101 |
+
streamlit run src/dashboard_app.py
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
## 📋 Pipeline Steps
|
| 105 |
+
|
| 106 |
+
### 1. Data Preprocessing (`data_preprocessing.py`)
|
| 107 |
+
- Loads FMCG sales data from `data/raw/`
|
| 108 |
+
- Handles missing values and outliers
|
| 109 |
+
- Creates time-series train/test split
|
| 110 |
+
- Saves cleaned data to `data/processed/`
|
| 111 |
+
|
| 112 |
+
**Key Features:**
|
| 113 |
+
- Automatic outlier detection using IQR method
|
| 114 |
+
- Time-series aware splitting (train up to mid-2023, test late 2023-2024)
|
| 115 |
+
- Comprehensive data validation and cleaning
|
| 116 |
+
|
| 117 |
+
### 2. Feature Engineering (`feature_engineering.py`)
|
| 118 |
+
- Creates lag features (1, 7, 14, 30 days)
|
| 119 |
+
- Generates rolling averages and statistics
|
| 120 |
+
- Adds time-based features (month, quarter, day-of-week)
|
| 121 |
+
- Includes holiday and seasonal features
|
| 122 |
+
- Encodes categorical variables
|
| 123 |
+
|
| 124 |
+
**Key Features:**
|
| 125 |
+
- 50+ engineered features
|
| 126 |
+
- Holiday calendar integration
|
| 127 |
+
- Seasonal decomposition
|
| 128 |
+
- Categorical encoding with label encoders
|
| 129 |
+
|
| 130 |
+
### 3. Model Training (`train_models.py`)
|
| 131 |
+
- Trains **Prophet** for time series forecasting
|
| 132 |
+
- Trains **XGBoost** for sales prediction
|
| 133 |
+
- Saves models to `models/` directory
|
| 134 |
+
|
| 135 |
+
**Models:**
|
| 136 |
+
- **Prophet**: Captures trends, seasonality, and holidays
|
| 137 |
+
- **XGBoost**: Handles complex feature interactions
|
| 138 |
+
|
| 139 |
+
### 4. Model Evaluation (`evaluate_models.py`)
|
| 140 |
+
- Evaluates both models using multiple metrics
|
| 141 |
+
- Generates comprehensive visualizations
|
| 142 |
+
- Creates interactive plots with Plotly
|
| 143 |
+
|
| 144 |
+
**Metrics:**
|
| 145 |
+
- MAE, RMSE, MAPE, R²
|
| 146 |
+
- Directional accuracy
|
| 147 |
+
- Bias analysis
|
| 148 |
+
|
| 149 |
+
### 5. SHAP Explainability (`explainability.py`)
|
| 150 |
+
- Generates global feature importance
|
| 151 |
+
- Creates local explanations for individual predictions
|
| 152 |
+
- Produces business insights and recommendations
|
| 153 |
+
|
| 154 |
+
**Outputs:**
|
| 155 |
+
- SHAP summary plots
|
| 156 |
+
- Waterfall plots for individual predictions
|
| 157 |
+
- Feature interaction analysis
|
| 158 |
+
- Business recommendations
|
| 159 |
+
|
| 160 |
+
### 6. RAG Pipeline (`rag_pipeline.py`)
|
| 161 |
+
- Converts sales data into text documents
|
| 162 |
+
- Builds FAISS vector database
|
| 163 |
+
- Implements LangChain QA system
|
| 164 |
+
- Answers natural language queries
|
| 165 |
+
|
| 166 |
+
**Capabilities:**
|
| 167 |
+
- Natural language querying
|
| 168 |
+
- Context-aware responses
|
| 169 |
+
- Source attribution
|
| 170 |
+
- Multiple document types (daily summaries, product analysis, regional performance)
|
| 171 |
+
|
| 172 |
+
### 7. Dashboard (`dashboard_app.py`)
|
| 173 |
+
- Interactive Streamlit interface
|
| 174 |
+
- Real-time model predictions
|
| 175 |
+
- SHAP visualizations
|
| 176 |
+
- RAG query interface
|
| 177 |
+
|
| 178 |
+
**Tabs:**
|
| 179 |
+
- Sales Overview
|
| 180 |
+
- Forecasting
|
| 181 |
+
- Model Explainability
|
| 182 |
+
- Business Queries
|
| 183 |
+
|
| 184 |
+
## 🎯 Usage Examples
|
| 185 |
+
|
| 186 |
+
### Running Individual Components
|
| 187 |
+
|
| 188 |
+
```bash
|
| 189 |
+
# Data preprocessing only
|
| 190 |
+
python src/data_preprocessing.py
|
| 191 |
+
|
| 192 |
+
# Feature engineering only
|
| 193 |
+
python src/feature_engineering.py
|
| 194 |
+
|
| 195 |
+
# Train models only
|
| 196 |
+
python src/train_models.py
|
| 197 |
+
|
| 198 |
+
# Evaluate models only
|
| 199 |
+
python src/evaluate_models.py
|
| 200 |
+
|
| 201 |
+
# SHAP analysis only
|
| 202 |
+
python src/explainability.py
|
| 203 |
+
|
| 204 |
+
# RAG pipeline only
|
| 205 |
+
python src/rag_pipeline.py
|
| 206 |
+
```
|
| 207 |
+
|
| 208 |
+
### Custom Queries via RAG
|
| 209 |
+
|
| 210 |
+
The RAG system can answer questions like:
|
| 211 |
+
- "What were the total sales in 2023?"
|
| 212 |
+
- "Which product had the highest sales?"
|
| 213 |
+
- "How did promotions affect sales performance?"
|
| 214 |
+
- "What caused the sales dip in Q2 2023?"
|
| 215 |
+
- "Which region performed best?"
|
| 216 |
+
|
| 217 |
+
### Dashboard Features
|
| 218 |
+
|
| 219 |
+
1. **Sales Overview**: Key metrics, trends, regional analysis
|
| 220 |
+
2. **Forecasting**: Prophet predictions with confidence intervals
|
| 221 |
+
3. **Explainability**: SHAP plots and feature importance
|
| 222 |
+
4. **Business Queries**: Natural language Q&A interface
|
| 223 |
+
|
| 224 |
+
## 📊 Sample Outputs
|
| 225 |
+
|
| 226 |
+
### Model Performance
|
| 227 |
+
```
|
| 228 |
+
Prophet Model Metrics:
|
| 229 |
+
MAE: 245.32
|
| 230 |
+
RMSE: 312.45
|
| 231 |
+
MAPE: 8.67%
|
| 232 |
+
|
| 233 |
+
XGBoost Model Metrics:
|
| 234 |
+
MAE: 198.76
|
| 235 |
+
RMSE: 289.34
|
| 236 |
+
R²: 0.847
|
| 237 |
+
MAPE: 7.23%
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
### Business Insights
|
| 241 |
+
- Price optimization is crucial for sales performance
|
| 242 |
+
- Historical sales patterns significantly influence future sales
|
| 243 |
+
- Promotional activities have strong impact on sales
|
| 244 |
+
- Seasonal patterns are important for sales forecasting
|
| 245 |
+
|
| 246 |
+
## 🔧 Configuration
|
| 247 |
+
|
| 248 |
+
The `config.yaml` file controls all aspects of the pipeline:
|
| 249 |
+
|
| 250 |
+
```yaml
|
| 251 |
+
# Data paths
|
| 252 |
+
data:
|
| 253 |
+
raw_dir: "data/raw"
|
| 254 |
+
processed_dir: "data/processed"
|
| 255 |
+
|
| 256 |
+
# Model parameters
|
| 257 |
+
models_config:
|
| 258 |
+
prophet:
|
| 259 |
+
changepoint_prior_scale: 0.05
|
| 260 |
+
seasonality_prior_scale: 10.0
|
| 261 |
+
|
| 262 |
+
xgboost:
|
| 263 |
+
n_estimators: 1000
|
| 264 |
+
max_depth: 6
|
| 265 |
+
|
| 266 |
+
# RAG settings
|
| 267 |
+
rag:
|
| 268 |
+
embedding_model: "sentence-transformers/all-MiniLM-L6-v2"
|
| 269 |
+
llm_model: "google/flan-t5-base"
|
| 270 |
+
chunk_size: 1000
|
| 271 |
+
```
|
| 272 |
+
|
| 273 |
+
## 🚀 Production Deployment
|
| 274 |
+
|
| 275 |
+
### Docker Deployment
|
| 276 |
+
```dockerfile
|
| 277 |
+
FROM python:3.9-slim
|
| 278 |
+
|
| 279 |
+
WORKDIR /app
|
| 280 |
+
COPY requirements.txt .
|
| 281 |
+
RUN pip install -r requirements.txt
|
| 282 |
+
|
| 283 |
+
COPY . .
|
| 284 |
+
EXPOSE 8501
|
| 285 |
+
|
| 286 |
+
CMD ["streamlit", "run", "src/dashboard_app.py", "--server.port=8501"]
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
### Cloud Deployment
|
| 290 |
+
- **AWS**: Deploy on EC2 with Streamlit
|
| 291 |
+
- **GCP**: Use Cloud Run for containerized deployment
|
| 292 |
+
- **Azure**: Deploy on App Service
|
| 293 |
+
|
| 294 |
+
## 📈 Performance Optimization
|
| 295 |
+
|
| 296 |
+
### For Large Datasets
|
| 297 |
+
- Use chunked processing in data preprocessing
|
| 298 |
+
- Implement parallel feature engineering
|
| 299 |
+
- Use GPU acceleration for SHAP calculations
|
| 300 |
+
- Optimize FAISS index for faster retrieval
|
| 301 |
+
|
| 302 |
+
### Memory Management
|
| 303 |
+
- Process data in batches
|
| 304 |
+
- Use memory-efficient data types
|
| 305 |
+
- Implement garbage collection in loops
|
| 306 |
+
|
| 307 |
+
## 🔍 Troubleshooting
|
| 308 |
+
|
| 309 |
+
### Common Issues
|
| 310 |
+
|
| 311 |
+
1. **Memory Errors**: Reduce batch sizes in config
|
| 312 |
+
2. **Model Loading**: Ensure all dependencies are installed
|
| 313 |
+
3. **RAG Pipeline**: Check HuggingFace model availability
|
| 314 |
+
4. **Dashboard**: Verify Streamlit installation
|
| 315 |
+
|
| 316 |
+
### Debug Mode
|
| 317 |
+
```bash
|
| 318 |
+
# Run with verbose logging
|
| 319 |
+
python run_pipeline.py --config config_debug.yaml
|
| 320 |
+
```
|
| 321 |
+
|
| 322 |
+
## 🤝 Contributing
|
| 323 |
+
|
| 324 |
+
1. Fork the repository
|
| 325 |
+
2. Create a feature branch
|
| 326 |
+
3. Make your changes
|
| 327 |
+
4. Add tests
|
| 328 |
+
5. Submit a pull request
|
| 329 |
+
|
| 330 |
+
## 📄 License
|
| 331 |
+
|
| 332 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
| 333 |
+
|
| 334 |
+
## 🙏 Acknowledgments
|
| 335 |
+
|
| 336 |
+
- **Prophet** by Facebook Research
|
| 337 |
+
- **XGBoost** by DMLC
|
| 338 |
+
- **SHAP** by Microsoft Research
|
| 339 |
+
- **LangChain** by Harrison Chase
|
| 340 |
+
- **Streamlit** for the dashboard framework
|
| 341 |
+
|
| 342 |
+
## 📞 Support
|
| 343 |
+
|
| 344 |
+
For questions or issues:
|
| 345 |
+
- Create an issue on GitHub
|
| 346 |
+
- Check the logs in `logs/fmcg_pipeline.log`
|
| 347 |
+
- Review the configuration in `config.yaml`
|
| 348 |
+
|
| 349 |
+
---
|
| 350 |
+
|
| 351 |
+
**🎉 Ready to transform your FMCG business with AI-powered insights!**
|
fmcg_genai/config.yaml
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FMCG Generative AI Project Configuration
|
| 2 |
+
|
| 3 |
+
# Data paths
|
| 4 |
+
data:
|
| 5 |
+
raw_dir: "data/raw"
|
| 6 |
+
processed_dir: "data/processed"
|
| 7 |
+
main_file: "FMCG_2022_2024.csv"
|
| 8 |
+
cleaned_file: "cleaned.csv"
|
| 9 |
+
features_file: "features.csv"
|
| 10 |
+
|
| 11 |
+
# Model paths
|
| 12 |
+
models:
|
| 13 |
+
prophet_model: "models/prophet.pkl"
|
| 14 |
+
xgboost_model: "models/xgboost_sales.pkl"
|
| 15 |
+
vector_store: "vector_store/faiss_index"
|
| 16 |
+
|
| 17 |
+
# Reports paths
|
| 18 |
+
reports:
|
| 19 |
+
evaluation: "reports/evaluation.json"
|
| 20 |
+
shap_dir: "reports/shap"
|
| 21 |
+
|
| 22 |
+
# Data preprocessing parameters
|
| 23 |
+
preprocessing:
|
| 24 |
+
train_split_date: "2023-06-30" # Train up to mid-2023
|
| 25 |
+
test_split_date: "2023-07-01" # Test from late 2023 onwards
|
| 26 |
+
outlier_threshold: 3.0 # Standard deviations for outlier detection
|
| 27 |
+
min_date: "2022-01-01"
|
| 28 |
+
max_date: "2024-12-31"
|
| 29 |
+
|
| 30 |
+
# Feature engineering parameters
|
| 31 |
+
features:
|
| 32 |
+
lag_features: [1, 7, 14, 30] # Days for lag features
|
| 33 |
+
rolling_windows: [7, 14, 30] # Days for rolling averages
|
| 34 |
+
categorical_columns: ["brand", "segment", "category", "channel", "region", "pack_type"]
|
| 35 |
+
target_column: "units_sold"
|
| 36 |
+
|
| 37 |
+
# Model parameters
|
| 38 |
+
models_config:
|
| 39 |
+
prophet:
|
| 40 |
+
changepoint_prior_scale: 0.05
|
| 41 |
+
seasonality_prior_scale: 10.0
|
| 42 |
+
holidays_prior_scale: 10.0
|
| 43 |
+
seasonality_mode: "multiplicative"
|
| 44 |
+
|
| 45 |
+
xgboost:
|
| 46 |
+
n_estimators: 1000
|
| 47 |
+
max_depth: 6
|
| 48 |
+
learning_rate: 0.1
|
| 49 |
+
subsample: 0.8
|
| 50 |
+
colsample_bytree: 0.8
|
| 51 |
+
random_state: 42
|
| 52 |
+
|
| 53 |
+
# RAG pipeline parameters
|
| 54 |
+
rag:
|
| 55 |
+
chunk_size: 1000
|
| 56 |
+
chunk_overlap: 200
|
| 57 |
+
embedding_model: "sentence-transformers/all-MiniLM-L6-v2"
|
| 58 |
+
llm_model: "google/flan-t5-base" # Alternative: "microsoft/DialoGPT-medium"
|
| 59 |
+
max_tokens: 512
|
| 60 |
+
temperature: 0.7
|
| 61 |
+
top_k: 5
|
| 62 |
+
|
| 63 |
+
# API Keys (replace with your actual keys)
|
| 64 |
+
api_keys:
|
| 65 |
+
openai_api_key: "your-openai-api-key-here" # Optional: for OpenAI models
|
| 66 |
+
huggingface_token: "your-huggingface-token-here" # Optional: for private models
|
| 67 |
+
|
| 68 |
+
# Dashboard parameters
|
| 69 |
+
dashboard:
|
| 70 |
+
port: 8501
|
| 71 |
+
host: "localhost"
|
| 72 |
+
title: "FMCG Sales Analytics & Forecasting"
|
| 73 |
+
theme: "light"
|
| 74 |
+
|
| 75 |
+
# Logging
|
| 76 |
+
logging:
|
| 77 |
+
level: "INFO"
|
| 78 |
+
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
| 79 |
+
file: "logs/fmcg_pipeline.log"
|
| 80 |
+
|
| 81 |
+
# Performance settings
|
| 82 |
+
performance:
|
| 83 |
+
n_jobs: -1 # Use all CPU cores
|
| 84 |
+
memory_efficient: true
|
| 85 |
+
batch_size: 1000
|
fmcg_genai/data/processed/cleaned.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:602081a2865f2e379b8f54cfe0cf3e131a9b544afa5b3dbf9829e106d09bcddc
|
| 3 |
+
size 15226587
|
fmcg_genai/data/processed/data_summary.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"total_records": 161943,
|
| 3 |
+
"date_range": "2022-01-21 00:00:00 to 2024-12-31 00:00:00",
|
| 4 |
+
"unique_products": 30,
|
| 5 |
+
"unique_brands": 14,
|
| 6 |
+
"unique_regions": 3,
|
| 7 |
+
"total_sales": "2809736",
|
| 8 |
+
"avg_sales_per_day": 2611.278810408922,
|
| 9 |
+
"columns": [
|
| 10 |
+
"date",
|
| 11 |
+
"sku",
|
| 12 |
+
"brand",
|
| 13 |
+
"segment",
|
| 14 |
+
"category",
|
| 15 |
+
"channel",
|
| 16 |
+
"region",
|
| 17 |
+
"pack_type",
|
| 18 |
+
"price_unit",
|
| 19 |
+
"promotion_flag",
|
| 20 |
+
"delivery_days",
|
| 21 |
+
"stock_available",
|
| 22 |
+
"delivered_qty",
|
| 23 |
+
"units_sold"
|
| 24 |
+
],
|
| 25 |
+
"data_types": {
|
| 26 |
+
"date": "datetime64[ns]",
|
| 27 |
+
"sku": "object",
|
| 28 |
+
"brand": "object",
|
| 29 |
+
"segment": "object",
|
| 30 |
+
"category": "object",
|
| 31 |
+
"channel": "object",
|
| 32 |
+
"region": "object",
|
| 33 |
+
"pack_type": "object",
|
| 34 |
+
"price_unit": "float64",
|
| 35 |
+
"promotion_flag": "int64",
|
| 36 |
+
"delivery_days": "int64",
|
| 37 |
+
"stock_available": "int64",
|
| 38 |
+
"delivered_qty": "int64",
|
| 39 |
+
"units_sold": "int64"
|
| 40 |
+
}
|
| 41 |
+
}
|
fmcg_genai/data/processed/feature_summary.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"total_features": 63,
|
| 3 |
+
"numeric_features": 54,
|
| 4 |
+
"categorical_features": 8,
|
| 5 |
+
"lag_features": 16,
|
| 6 |
+
"rolling_features": 0,
|
| 7 |
+
"time_features": 5,
|
| 8 |
+
"feature_list": [
|
| 9 |
+
"date",
|
| 10 |
+
"sku",
|
| 11 |
+
"brand",
|
| 12 |
+
"segment",
|
| 13 |
+
"category",
|
| 14 |
+
"channel",
|
| 15 |
+
"region",
|
| 16 |
+
"pack_type",
|
| 17 |
+
"price_unit",
|
| 18 |
+
"promotion_flag",
|
| 19 |
+
"delivery_days",
|
| 20 |
+
"stock_available",
|
| 21 |
+
"delivered_qty",
|
| 22 |
+
"units_sold",
|
| 23 |
+
"year",
|
| 24 |
+
"month",
|
| 25 |
+
"quarter",
|
| 26 |
+
"day_of_week",
|
| 27 |
+
"day_of_month",
|
| 28 |
+
"week_of_year",
|
| 29 |
+
"is_weekend",
|
| 30 |
+
"is_month_start",
|
| 31 |
+
"is_month_end",
|
| 32 |
+
"is_quarter_start",
|
| 33 |
+
"is_quarter_end",
|
| 34 |
+
"sin_month",
|
| 35 |
+
"cos_month",
|
| 36 |
+
"sin_day_of_week",
|
| 37 |
+
"cos_day_of_week",
|
| 38 |
+
"is_holiday",
|
| 39 |
+
"holiday_name",
|
| 40 |
+
"days_to_holiday",
|
| 41 |
+
"days_after_holiday",
|
| 42 |
+
"lag_1_sales",
|
| 43 |
+
"lag_1_price",
|
| 44 |
+
"lag_1_stock",
|
| 45 |
+
"lag_1_delivery",
|
| 46 |
+
"lag_7_sales",
|
| 47 |
+
"lag_7_price",
|
| 48 |
+
"lag_7_stock",
|
| 49 |
+
"lag_7_delivery",
|
| 50 |
+
"lag_14_sales",
|
| 51 |
+
"lag_14_price",
|
| 52 |
+
"lag_14_stock",
|
| 53 |
+
"lag_14_delivery",
|
| 54 |
+
"lag_30_sales",
|
| 55 |
+
"lag_30_price",
|
| 56 |
+
"lag_30_stock",
|
| 57 |
+
"lag_30_delivery",
|
| 58 |
+
"price_volume_ratio",
|
| 59 |
+
"price_stock_ratio",
|
| 60 |
+
"promotion_effect",
|
| 61 |
+
"promotion_price_effect",
|
| 62 |
+
"delivery_stock_ratio",
|
| 63 |
+
"delivery_sales_ratio",
|
| 64 |
+
"monthly_sales_trend",
|
| 65 |
+
"weekend_sales_boost",
|
| 66 |
+
"brand_encoded",
|
| 67 |
+
"segment_encoded",
|
| 68 |
+
"category_encoded",
|
| 69 |
+
"channel_encoded",
|
| 70 |
+
"region_encoded",
|
| 71 |
+
"pack_type_encoded"
|
| 72 |
+
]
|
| 73 |
+
}
|
fmcg_genai/data/processed/features.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e27cd21916356337f4d98f219e1d2cebde686665eb6df9c53ec7fb05e1184733
|
| 3 |
+
size 133732972
|
fmcg_genai/data/processed/label_encoders.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6be47f8bb34cf1852a3c58a41e735daaa160548fe0607f461350e8176ed4ccab
|
| 3 |
+
size 2071
|
fmcg_genai/data/processed/scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db7a5d744156a377cc20a9ac75876054c1de370a677c4c08b8e4978a77ce60af
|
| 3 |
+
size 2111
|
fmcg_genai/data/processed/test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/data/processed/test_features.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18b1af312129ff092ad2bbc84e71a052d8b2d7a369a93fff5802ae65e91f02b5
|
| 3 |
+
size 80063408
|
fmcg_genai/data/processed/train.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/data/processed/train_features.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d975a4bf821b74ea3b14124eaf048d80e78066573ecb02b603ae88e28b11324
|
| 3 |
+
size 40910185
|
fmcg_genai/data/processed/validation.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/data/processed/validation_features.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:035a7cb7df6cb278085440a08ae55e01de24133e995faed3025a9d9a64a80bf3
|
| 3 |
+
size 12761069
|
fmcg_genai/data/raw/FMCG_2022_2024.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec91eb23b62ec9836d0923f27cadb7d1d57c4185e111144a1e54feb569c4e61d
|
| 3 |
+
size 17747435
|
fmcg_genai/data/raw/batch_MI-006_2025-01-06.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f4f6f0fc9ca515d58e51a846e3d4e62f442640778aace98a7183154cdb850d8
|
| 3 |
+
size 11332
|
fmcg_genai/data/raw/batch_MI-006_2025-01-13.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0717466de9546fbfd4d1f4374fb22758f94f0906e0b6acc45795d31b77466528
|
| 3 |
+
size 11764
|
fmcg_genai/data/raw/batch_MI-006_2025-01-20.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a93fc5ecdcce282bfc987536fcad6fd6c142c139374689ba3c318fb4d61dcab
|
| 3 |
+
size 11723
|
fmcg_genai/data/raw/batch_MI-006_2025-01-27.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bd5fad1a72630908ef91b5b13603498df4f33e9b35e8daf14ca04345ed69d73
|
| 3 |
+
size 11766
|
fmcg_genai/data/raw/df_weekly_MI-006_enriched.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/data/raw/weekly_df_final_for_modeling.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/logs/fmcg_pipeline.log
ADDED
|
File without changes
|
fmcg_genai/models/model_metadata.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prophet_params": {
|
| 3 |
+
"changepoint_prior_scale": 0.05,
|
| 4 |
+
"seasonality_prior_scale": 10.0,
|
| 5 |
+
"holidays_prior_scale": 10.0,
|
| 6 |
+
"seasonality_mode": "multiplicative"
|
| 7 |
+
},
|
| 8 |
+
"xgboost_params": {
|
| 9 |
+
"n_estimators": 1000,
|
| 10 |
+
"max_depth": 6,
|
| 11 |
+
"learning_rate": 0.1,
|
| 12 |
+
"subsample": 0.8,
|
| 13 |
+
"colsample_bytree": 0.8,
|
| 14 |
+
"random_state": 42
|
| 15 |
+
},
|
| 16 |
+
"target_column": "units_sold",
|
| 17 |
+
"training_date": "2025-09-01T01:27:55.229753"
|
| 18 |
+
}
|
fmcg_genai/models/prophet.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ecb872b28026d1b76adff4f386ac252561d308d629f580ff7af481aab81a984
|
| 3 |
+
size 61546
|
fmcg_genai/models/training_report.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prophet_metrics": {
|
| 3 |
+
"mae": 1530.120897997467,
|
| 4 |
+
"rmse": 1786.4830823806167,
|
| 5 |
+
"mape": 48.27730556519623
|
| 6 |
+
},
|
| 7 |
+
"xgboost_metrics": {
|
| 8 |
+
"mae": 0.19390276074409485,
|
| 9 |
+
"rmse": 0.2811661701307301,
|
| 10 |
+
"r2": 0.9986926913261414,
|
| 11 |
+
"mape": Infinity
|
| 12 |
+
},
|
| 13 |
+
"training_summary": {
|
| 14 |
+
"prophet_model_path": "models/prophet.pkl",
|
| 15 |
+
"xgboost_model_path": "models/xgboost_sales.pkl",
|
| 16 |
+
"training_date": "2025-09-01T01:27:55.231752"
|
| 17 |
+
}
|
| 18 |
+
}
|
fmcg_genai/models/xgboost_sales.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43d72d9df364664ee1b2b3f9a9f97fd988452cf7debb4f74ff6b5665a8139fb0
|
| 3 |
+
size 4764796
|
fmcg_genai/reports/detailed_metrics.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metric,Prophet,XGBoost
|
| 2 |
+
mae,1530.120897997467,0.19390276074409485
|
| 3 |
+
rmse,1786.4830823806167,0.2811661701307301
|
| 4 |
+
mape,48.27730556504157,10160557.13591644
|
| 5 |
+
r2,-17.990151726059484,0.9986926913261414
|
| 6 |
+
mse,3191521.8036321495,0.07905441522598267
|
| 7 |
+
directional_accuracy,51.91256830601093,94.84967734715953
|
| 8 |
+
bias,1503.2120809372166,0.0056714017673694245
|
| 9 |
+
variance_explained,-4.544832011161535,0.998693224534181
|
| 10 |
+
mean_true,3243.5345454545454,16.635526917015582
|
| 11 |
+
mean_pred,4746.746626391762,16.64120101928711
|
| 12 |
+
std_true,409.9535868266872,7.776321536686768
|
| 13 |
+
std_pred,991.0367830893551,7.778368949890137
|
fmcg_genai/reports/evaluation.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"evaluation_summary": {
|
| 3 |
+
"evaluation_date": "2025-09-01T01:28:26.114686",
|
| 4 |
+
"test_data_size": 550,
|
| 5 |
+
"date_range": "2023-07-01 00:00:00 to 2024-12-31 00:00:00"
|
| 6 |
+
},
|
| 7 |
+
"prophet_results": {
|
| 8 |
+
"mae": 1530.120897997467,
|
| 9 |
+
"rmse": 1786.4830823806167,
|
| 10 |
+
"mape": 48.27730556504157,
|
| 11 |
+
"r2": -17.990151726059484,
|
| 12 |
+
"mse": 3191521.8036321495,
|
| 13 |
+
"directional_accuracy": 51.91256830601093,
|
| 14 |
+
"bias": 1503.2120809372166,
|
| 15 |
+
"variance_explained": -4.544832011161535,
|
| 16 |
+
"mean_true": 3243.5345454545454,
|
| 17 |
+
"mean_pred": 4746.746626391762,
|
| 18 |
+
"std_true": 409.9535868266872,
|
| 19 |
+
"std_pred": 991.0367830893551
|
| 20 |
+
},
|
| 21 |
+
"xgboost_results": {
|
| 22 |
+
"mae": 0.19390276074409485,
|
| 23 |
+
"rmse": 0.2811661701307301,
|
| 24 |
+
"mape": 10160557.13591644,
|
| 25 |
+
"r2": 0.9986926913261414,
|
| 26 |
+
"mse": 0.07905441522598267,
|
| 27 |
+
"directional_accuracy": 94.84967734715953,
|
| 28 |
+
"bias": 0.0056714017673694245,
|
| 29 |
+
"variance_explained": 0.998693224534181,
|
| 30 |
+
"mean_true": 16.635526917015582,
|
| 31 |
+
"mean_pred": 16.64120101928711,
|
| 32 |
+
"std_true": 7.776321536686768,
|
| 33 |
+
"std_pred": 7.778368949890137
|
| 34 |
+
},
|
| 35 |
+
"model_comparison": {
|
| 36 |
+
"best_mae": "XGBoost",
|
| 37 |
+
"best_rmse": "XGBoost",
|
| 38 |
+
"best_r2": "XGBoost",
|
| 39 |
+
"best_mape": "Prophet"
|
| 40 |
+
},
|
| 41 |
+
"feature_importance": {
|
| 42 |
+
"quarter": "0.24001852",
|
| 43 |
+
"price_volume_ratio": "0.21321724",
|
| 44 |
+
"price_stock_ratio": "0.100424424",
|
| 45 |
+
"stock_available": "0.09902188",
|
| 46 |
+
"delivery_sales_ratio": "0.09652115",
|
| 47 |
+
"delivery_days": "0.07960708",
|
| 48 |
+
"price_unit": "0.064880736",
|
| 49 |
+
"weekend_sales_boost": "0.027654927",
|
| 50 |
+
"delivery_stock_ratio": "0.023944313",
|
| 51 |
+
"week_of_year": "0.016661568",
|
| 52 |
+
"monthly_sales_trend": "0.016195565",
|
| 53 |
+
"month": "0.008638526",
|
| 54 |
+
"sin_month": "0.0041564563",
|
| 55 |
+
"cos_month": "0.002141759",
|
| 56 |
+
"is_weekend": "0.0021345655",
|
| 57 |
+
"day_of_week": "0.00080703705",
|
| 58 |
+
"lag_7_sales": "0.0002917967",
|
| 59 |
+
"segment_encoded": "0.00027531944",
|
| 60 |
+
"lag_1_sales": "0.0002709551",
|
| 61 |
+
"brand_encoded": "0.00021452275",
|
| 62 |
+
"sku": "0.00021435674",
|
| 63 |
+
"lag_14_sales": "0.00019604391",
|
| 64 |
+
"segment": "0.00018667316",
|
| 65 |
+
"delivered_qty": "0.00017336577",
|
| 66 |
+
"lag_30_sales": "0.00016313794",
|
| 67 |
+
"year": "0.000114514405",
|
| 68 |
+
"sin_day_of_week": "0.000108376145",
|
| 69 |
+
"days_after_holiday": "9.9591394e-05",
|
| 70 |
+
"days_to_holiday": "9.059149e-05",
|
| 71 |
+
"cos_day_of_week": "8.695877e-05",
|
| 72 |
+
"day_of_month": "7.911242e-05",
|
| 73 |
+
"region_encoded": "7.33006e-05",
|
| 74 |
+
"lag_30_price": "6.854336e-05",
|
| 75 |
+
"is_month_start": "6.6947825e-05",
|
| 76 |
+
"lag_7_stock": "6.683317e-05",
|
| 77 |
+
"lag_30_stock": "6.6029024e-05",
|
| 78 |
+
"lag_1_delivery": "6.460318e-05",
|
| 79 |
+
"is_quarter_start": "6.434708e-05",
|
| 80 |
+
"lag_7_price": "6.358946e-05",
|
| 81 |
+
"lag_1_price": "6.172716e-05",
|
| 82 |
+
"lag_30_delivery": "6.1450606e-05",
|
| 83 |
+
"lag_1_stock": "6.120854e-05",
|
| 84 |
+
"lag_14_delivery": "6.0455568e-05",
|
| 85 |
+
"lag_14_stock": "5.947861e-05",
|
| 86 |
+
"lag_14_price": "5.828131e-05",
|
| 87 |
+
"is_month_end": "5.701209e-05",
|
| 88 |
+
"is_holiday": "5.6626708e-05",
|
| 89 |
+
"lag_7_delivery": "5.6616092e-05",
|
| 90 |
+
"is_quarter_end": "5.6224068e-05",
|
| 91 |
+
"brand": "5.551302e-05",
|
| 92 |
+
"channel_encoded": "5.0425795e-05",
|
| 93 |
+
"region": "4.7347516e-05",
|
| 94 |
+
"pack_type_encoded": "4.1937787e-05",
|
| 95 |
+
"pack_type": "4.1645082e-05",
|
| 96 |
+
"channel": "3.81141e-05",
|
| 97 |
+
"category": "1.0780985e-05",
|
| 98 |
+
"promotion_flag": "0.0",
|
| 99 |
+
"promotion_effect": "0.0",
|
| 100 |
+
"promotion_price_effect": "0.0",
|
| 101 |
+
"category_encoded": "0.0"
|
| 102 |
+
}
|
| 103 |
+
}
|
fmcg_genai/reports/model_comparison.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/reports/model_evaluation_plots.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/prophet_forecast.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/reports/rag_test_results.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"query": "What were the total sales in 2023?",
|
| 4 |
+
"answer": {
|
| 5 |
+
"answer": "2510 units",
|
| 6 |
+
"sources": [
|
| 7 |
+
"On 2022-03-10, sales were 203 units, avg price $5.13, stock 146, 0 promos.",
|
| 8 |
+
"On 2022-12-01, sales were 2076 units, avg price $5.30, stock 156, 0 promos.",
|
| 9 |
+
"On 2022-12-28, sales were 2073 units, avg price $5.06, stock 155, 0 promos.",
|
| 10 |
+
"On 2023-01-22, sales were 2510 units, avg price $5.13, stock 159, 0 promos.",
|
| 11 |
+
"On 2022-06-23, sales were 1566 units, avg price $5.10, stock 153, 0 promos."
|
| 12 |
+
]
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"query": "Which product had the highest sales?",
|
| 17 |
+
"answer": {
|
| 18 |
+
"answer": "2022-04-27",
|
| 19 |
+
"sources": [
|
| 20 |
+
"On 2022-04-27, sales were 919 units, avg price $5.50, stock 152, 0 promos.",
|
| 21 |
+
"On 2022-11-26, sales were 1735 units, avg price $4.98, stock 148, 0 promos.",
|
| 22 |
+
"On 2022-02-19, sales were 153 units, avg price $5.13, stock 158, 0 promos.",
|
| 23 |
+
"On 2022-02-21, sales were 154 units, avg price $4.95, stock 147, 0 promos.",
|
| 24 |
+
"On 2023-09-11, sales were 3135 units, avg price $5.38, stock 153, 0 promos."
|
| 25 |
+
]
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"query": "What is the average price across all regions?",
|
| 30 |
+
"answer": {
|
| 31 |
+
"answer": "$5.26",
|
| 32 |
+
"sources": [
|
| 33 |
+
"Region PL-South sold 936027 units, 30 products, avg price $5.26.",
|
| 34 |
+
"Region PL-Central sold 935177 units, 30 products, avg price $5.24.",
|
| 35 |
+
"Region PL-North sold 938532 units, 30 products, avg price $5.25.",
|
| 36 |
+
"Product YO-009 sold 106753 units across 3 regions at avg price $5.20.",
|
| 37 |
+
"Product YO-003 sold 101658 units across 3 regions at avg price $5.24."
|
| 38 |
+
]
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"query": "How do sales compare with and without promotions?",
|
| 43 |
+
"answer": {
|
| 44 |
+
"answer": "2022-11-26",
|
| 45 |
+
"sources": [
|
| 46 |
+
"On 2022-11-26, sales were 1735 units, avg price $4.98, stock 148, 0 promos.",
|
| 47 |
+
"On 2022-11-15, sales were 1688 units, avg price $5.05, stock 151, 0 promos.",
|
| 48 |
+
"On 2024-10-10, sales were 2960 units, avg price $5.33, stock 154, 0 promos.",
|
| 49 |
+
"On 2022-08-09, sales were 1824 units, avg price $4.95, stock 157, 0 promos.",
|
| 50 |
+
"On 2022-11-29, sales were 2010 units, avg price $5.48, stock 153, 0 promos."
|
| 51 |
+
]
|
| 52 |
+
}
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"query": "What are the seasonal sales patterns?",
|
| 56 |
+
"answer": {
|
| 57 |
+
"answer": "2022-07-24",
|
| 58 |
+
"sources": [
|
| 59 |
+
"On 2022-07-24, sales were 1750 units, avg price $5.15, stock 156, 0 promos.",
|
| 60 |
+
"On 2022-12-31, sales were 1856 units, avg price $5.28, stock 154, 0 promos.",
|
| 61 |
+
"On 2022-09-21, sales were 1834 units, avg price $5.04, stock 168, 0 promos.",
|
| 62 |
+
"On 2022-12-02, sales were 2006 units, avg price $5.07, stock 159, 0 promos.",
|
| 63 |
+
"On 2024-05-01, sales were 3666 units, avg price $5.07, stock 162, 0 promos."
|
| 64 |
+
]
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
]
|
fmcg_genai/reports/shap/business_insights.json
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"top_drivers": [
|
| 3 |
+
"price_volume_ratio",
|
| 4 |
+
"price_unit",
|
| 5 |
+
"delivery_sales_ratio",
|
| 6 |
+
"price_stock_ratio",
|
| 7 |
+
"delivery_days",
|
| 8 |
+
"delivery_stock_ratio",
|
| 9 |
+
"monthly_sales_trend",
|
| 10 |
+
"stock_available",
|
| 11 |
+
"quarter",
|
| 12 |
+
"month"
|
| 13 |
+
],
|
| 14 |
+
"feature_analysis": {
|
| 15 |
+
"price_volume_ratio": {
|
| 16 |
+
"importance": 4.597093105316162,
|
| 17 |
+
"positive_impact": 3.9045722484588623,
|
| 18 |
+
"negative_impact": -5.072391510009766,
|
| 19 |
+
"mean_value": -0.1248782468205759,
|
| 20 |
+
"std_value": 0.9007218725621275
|
| 21 |
+
},
|
| 22 |
+
"price_unit": {
|
| 23 |
+
"importance": 1.98605477809906,
|
| 24 |
+
"positive_impact": 2.1556525230407715,
|
| 25 |
+
"negative_impact": -1.789350986480713,
|
| 26 |
+
"mean_value": 0.026255893097764663,
|
| 27 |
+
"std_value": 0.997850138795022
|
| 28 |
+
},
|
| 29 |
+
"delivery_sales_ratio": {
|
| 30 |
+
"importance": 1.7703979015350342,
|
| 31 |
+
"positive_impact": 1.6189883947372437,
|
| 32 |
+
"negative_impact": -1.8813962936401367,
|
| 33 |
+
"mean_value": -0.0929672570059141,
|
| 34 |
+
"std_value": 0.9347505930648318
|
| 35 |
+
},
|
| 36 |
+
"price_stock_ratio": {
|
| 37 |
+
"importance": 1.4865055084228516,
|
| 38 |
+
"positive_impact": 1.7247811555862427,
|
| 39 |
+
"negative_impact": -1.3188611268997192,
|
| 40 |
+
"mean_value": 0.04453720749317007,
|
| 41 |
+
"std_value": 1.139988208022099
|
| 42 |
+
},
|
| 43 |
+
"delivery_days": {
|
| 44 |
+
"importance": 0.8067858219146729,
|
| 45 |
+
"positive_impact": 0.8751516342163086,
|
| 46 |
+
"negative_impact": -0.7208266258239746,
|
| 47 |
+
"mean_value": 0.057781104153786116,
|
| 48 |
+
"std_value": 0.9962577373566263
|
| 49 |
+
},
|
| 50 |
+
"delivery_stock_ratio": {
|
| 51 |
+
"importance": 0.4718804359436035,
|
| 52 |
+
"positive_impact": 0.502148449420929,
|
| 53 |
+
"negative_impact": -0.4564955234527588,
|
| 54 |
+
"mean_value": 0.06935765136188128,
|
| 55 |
+
"std_value": 1.2666551526500205
|
| 56 |
+
},
|
| 57 |
+
"monthly_sales_trend": {
|
| 58 |
+
"importance": 0.41195714473724365,
|
| 59 |
+
"positive_impact": 0.43033820390701294,
|
| 60 |
+
"negative_impact": -0.381966233253479,
|
| 61 |
+
"mean_value": 0.24717162868209472,
|
| 62 |
+
"std_value": 1.0569746989470468
|
| 63 |
+
},
|
| 64 |
+
"stock_available": {
|
| 65 |
+
"importance": 0.3852362334728241,
|
| 66 |
+
"positive_impact": 0.37044084072113037,
|
| 67 |
+
"negative_impact": -0.3952655494213104,
|
| 68 |
+
"mean_value": 0.007975214528825396,
|
| 69 |
+
"std_value": 1.0644954263471078
|
| 70 |
+
},
|
| 71 |
+
"quarter": {
|
| 72 |
+
"importance": 0.12988582253456116,
|
| 73 |
+
"positive_impact": 0.07589029520750046,
|
| 74 |
+
"negative_impact": -0.16882583498954773,
|
| 75 |
+
"mean_value": 0.5649065460035928,
|
| 76 |
+
"std_value": 1.013618829936179
|
| 77 |
+
},
|
| 78 |
+
"month": {
|
| 79 |
+
"importance": 0.12598280608654022,
|
| 80 |
+
"positive_impact": 0.12750393152236938,
|
| 81 |
+
"negative_impact": -0.12537360191345215,
|
| 82 |
+
"mean_value": 0.5153823327785186,
|
| 83 |
+
"std_value": 1.0081023981687793
|
| 84 |
+
}
|
| 85 |
+
},
|
| 86 |
+
"recommendations": [
|
| 87 |
+
"Price optimization is crucial for sales performance",
|
| 88 |
+
"Seasonal patterns are important for sales forecasting"
|
| 89 |
+
]
|
| 90 |
+
}
|
fmcg_genai/reports/shap/explainability_report.json
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"explainability_summary": {
|
| 3 |
+
"analysis_date": "2025-09-01T01:38:47.755942",
|
| 4 |
+
"total_features_analyzed": 60,
|
| 5 |
+
"top_features_count": 10,
|
| 6 |
+
"shap_values_calculated": 1000
|
| 7 |
+
},
|
| 8 |
+
"top_features": [
|
| 9 |
+
{
|
| 10 |
+
"feature": "price_volume_ratio",
|
| 11 |
+
"importance": 4.597093105316162
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"feature": "price_unit",
|
| 15 |
+
"importance": 1.98605477809906
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"feature": "delivery_sales_ratio",
|
| 19 |
+
"importance": 1.7703979015350342
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"feature": "price_stock_ratio",
|
| 23 |
+
"importance": 1.4865055084228516
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"feature": "delivery_days",
|
| 27 |
+
"importance": 0.8067858219146729
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"feature": "delivery_stock_ratio",
|
| 31 |
+
"importance": 0.4718804359436035
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"feature": "monthly_sales_trend",
|
| 35 |
+
"importance": 0.41195714473724365
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"feature": "stock_available",
|
| 39 |
+
"importance": 0.3852362334728241
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"feature": "quarter",
|
| 43 |
+
"importance": 0.12988582253456116
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"feature": "month",
|
| 47 |
+
"importance": 0.12598280608654022
|
| 48 |
+
}
|
| 49 |
+
],
|
| 50 |
+
"business_insights": {
|
| 51 |
+
"top_drivers": [
|
| 52 |
+
"price_volume_ratio",
|
| 53 |
+
"price_unit",
|
| 54 |
+
"delivery_sales_ratio",
|
| 55 |
+
"price_stock_ratio",
|
| 56 |
+
"delivery_days",
|
| 57 |
+
"delivery_stock_ratio",
|
| 58 |
+
"monthly_sales_trend",
|
| 59 |
+
"stock_available",
|
| 60 |
+
"quarter",
|
| 61 |
+
"month"
|
| 62 |
+
],
|
| 63 |
+
"feature_analysis": {
|
| 64 |
+
"price_volume_ratio": {
|
| 65 |
+
"importance": 4.597093105316162,
|
| 66 |
+
"positive_impact": 3.9045722484588623,
|
| 67 |
+
"negative_impact": -5.072391510009766,
|
| 68 |
+
"mean_value": -0.1248782468205759,
|
| 69 |
+
"std_value": 0.9007218725621275
|
| 70 |
+
},
|
| 71 |
+
"price_unit": {
|
| 72 |
+
"importance": 1.98605477809906,
|
| 73 |
+
"positive_impact": 2.1556525230407715,
|
| 74 |
+
"negative_impact": -1.789350986480713,
|
| 75 |
+
"mean_value": 0.026255893097764663,
|
| 76 |
+
"std_value": 0.997850138795022
|
| 77 |
+
},
|
| 78 |
+
"delivery_sales_ratio": {
|
| 79 |
+
"importance": 1.7703979015350342,
|
| 80 |
+
"positive_impact": 1.6189883947372437,
|
| 81 |
+
"negative_impact": -1.8813962936401367,
|
| 82 |
+
"mean_value": -0.0929672570059141,
|
| 83 |
+
"std_value": 0.9347505930648318
|
| 84 |
+
},
|
| 85 |
+
"price_stock_ratio": {
|
| 86 |
+
"importance": 1.4865055084228516,
|
| 87 |
+
"positive_impact": 1.7247811555862427,
|
| 88 |
+
"negative_impact": -1.3188611268997192,
|
| 89 |
+
"mean_value": 0.04453720749317007,
|
| 90 |
+
"std_value": 1.139988208022099
|
| 91 |
+
},
|
| 92 |
+
"delivery_days": {
|
| 93 |
+
"importance": 0.8067858219146729,
|
| 94 |
+
"positive_impact": 0.8751516342163086,
|
| 95 |
+
"negative_impact": -0.7208266258239746,
|
| 96 |
+
"mean_value": 0.057781104153786116,
|
| 97 |
+
"std_value": 0.9962577373566263
|
| 98 |
+
},
|
| 99 |
+
"delivery_stock_ratio": {
|
| 100 |
+
"importance": 0.4718804359436035,
|
| 101 |
+
"positive_impact": 0.502148449420929,
|
| 102 |
+
"negative_impact": -0.4564955234527588,
|
| 103 |
+
"mean_value": 0.06935765136188128,
|
| 104 |
+
"std_value": 1.2666551526500205
|
| 105 |
+
},
|
| 106 |
+
"monthly_sales_trend": {
|
| 107 |
+
"importance": 0.41195714473724365,
|
| 108 |
+
"positive_impact": 0.43033820390701294,
|
| 109 |
+
"negative_impact": -0.381966233253479,
|
| 110 |
+
"mean_value": 0.24717162868209472,
|
| 111 |
+
"std_value": 1.0569746989470468
|
| 112 |
+
},
|
| 113 |
+
"stock_available": {
|
| 114 |
+
"importance": 0.3852362334728241,
|
| 115 |
+
"positive_impact": 0.37044084072113037,
|
| 116 |
+
"negative_impact": -0.3952655494213104,
|
| 117 |
+
"mean_value": 0.007975214528825396,
|
| 118 |
+
"std_value": 1.0644954263471078
|
| 119 |
+
},
|
| 120 |
+
"quarter": {
|
| 121 |
+
"importance": 0.12988582253456116,
|
| 122 |
+
"positive_impact": 0.07589029520750046,
|
| 123 |
+
"negative_impact": -0.16882583498954773,
|
| 124 |
+
"mean_value": 0.5649065460035928,
|
| 125 |
+
"std_value": 1.013618829936179
|
| 126 |
+
},
|
| 127 |
+
"month": {
|
| 128 |
+
"importance": 0.12598280608654022,
|
| 129 |
+
"positive_impact": 0.12750393152236938,
|
| 130 |
+
"negative_impact": -0.12537360191345215,
|
| 131 |
+
"mean_value": 0.5153823327785186,
|
| 132 |
+
"std_value": 1.0081023981687793
|
| 133 |
+
}
|
| 134 |
+
},
|
| 135 |
+
"recommendations": [
|
| 136 |
+
"Price optimization is crucial for sales performance",
|
| 137 |
+
"Seasonal patterns are important for sales forecasting"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
"model_interpretability": {
|
| 141 |
+
"global_explanations": "SHAP summary and bar plots generated",
|
| 142 |
+
"local_explanations": "Waterfall and force plots for individual predictions",
|
| 143 |
+
"interaction_analysis": "Feature interaction plots created",
|
| 144 |
+
"business_recommendations": 2
|
| 145 |
+
}
|
| 146 |
+
}
|
fmcg_genai/reports/shap/interactive_shap_distribution.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/reports/shap/interactive_shap_importance.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/reports/shap/interactive_shap_summary.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fmcg_genai/reports/shap/shap_bar_plot.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_brand_vs_category.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_brand_vs_channel.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_brand_vs_region.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_brand_vs_segment.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_category_vs_channel.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_category_vs_region.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_channel_vs_region.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_segment_vs_category.png
ADDED
|
Git LFS Details
|
fmcg_genai/reports/shap/shap_dependence_segment_vs_channel.png
ADDED
|
Git LFS Details
|