Spaces:
Build error
Build error
| # home.py | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from PIL import Image | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import joblib | |
| from streamlit_option_menu import option_menu | |
| st.set_page_config(page_title="Sales Forecasting", page_icon="SF", initial_sidebar_state="expanded") | |
| # Loading images | |
| img_banner = Image.open("Forecast1.jpeg") | |
| img_banner2 = Image.open("Forecast2.png") | |
| img_3 = Image.open("Forecast3.jpg") | |
| # Define custom CSS styles | |
| css_style = { | |
| "body": {"background-color": "#F0F0F0"}, | |
| "header": {"background-color": "#FF4C1B", "color": "white", "padding": "1rem", "text-align": "center"}, | |
| "content": {"padding": "2rem"}, | |
| "forecast-button": {"background-color": "#FF4C1B", "color": "white", "padding": "0.5rem 1rem", "border": "none"}, | |
| } | |
| # Apply custom CSS to Streamlit components | |
| st.markdown( | |
| """ | |
| <style> | |
| body { | |
| background-color: #F0F0F0; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown( | |
| f""" | |
| <style> | |
| .reportview-container .main .block-container {{ | |
| max-width: 1000px; | |
| padding-top: 2rem; | |
| padding-bottom: 2rem; | |
| padding-right: 2rem; | |
| padding-left: 2rem; | |
| }} | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| def home_page(): | |
| st.write(f"""# Intelligent Sales Forecasting""", unsafe_allow_html=True) | |
| st.image(img_banner) | |
| st.write(f"""<h2>The Problem</h2> | |
| <p>Sales forecasting is a crucial aspect of business planning and decision-making. Accurate sales predictions help | |
| businesses manage inventory, allocate resources, and set realistic targets. However, forecasting sales can be | |
| challenging due to various factors such as market fluctuations, seasonality, and changing consumer behavior. | |
| Traditional methods for sales forecasting may not provide the level of accuracy and agility needed to respond to | |
| dynamic market conditions. | |
| To address these challenges, our Sales Forecasting Project aims to develop a robust and data-driven sales | |
| forecasting system using advanced machine learning techniques. This system will enable businesses to make | |
| informed decisions, optimize their operations, and adapt to market changes more effectively.</p> """, | |
| unsafe_allow_html=True) | |
| st.write(f"""<h2>Project Goals</h2> | |
| In the Sales Forecasting Project, our primary objective is to develop an accurate and robust machine learning | |
| model that can predict sales with high precision. The model will utilize a variety of input features, such as | |
| historical sales data and seasonality factors, to make predictions | |
| about future sales trends. | |
| Specifically, we aim to: | |
| 1. Analyze a comprehensive dataset that includes relevant features for sales forecasting. | |
| 2. Explore and preprocess the data to handle missing values, outliers, and ensure data quality. | |
| 3. Design and train a machine learning model using advanced algorithms to capture complex sales patterns. | |
| 4. Evaluate the model's performance using appropriate metrics and fine-tune it for optimal accuracy. | |
| 5. Develop a user-friendly interface that allows users to input relevant parameters and obtain sales predictions. | |
| 6. Provide actionable insights and recommendations based on the forecasted sales trends. | |
| By achieving these goals, we aim to empower businesses with a valuable tool for strategic planning, resource | |
| allocation, and decision-making, ultimately driving better business outcomes. """, | |
| unsafe_allow_html=True) | |
| def about_page(): | |
| st.write("""<h1>Project background</h1>""", unsafe_allow_html=True) | |
| st.image(img_banner2) | |
| st.write(""" | |
| <p>Sales forecasting is the foundation of a business’s financial story. | |
| Once you have your sales forecast you can create profit and loss statements, cash flow statements and balance sheets, thus helping you set goals for your company. | |
| Proper forecasting also ensures you have the right stock at all times and leads to less wasted stock. | |
| Having the skill to create a sales forecast will help you manage anything from a small business up to a large company, | |
| where you need to inform investors about your forecasts for a months, quarter or a year. | |
| The objective of this challenge is to create a model to forecast the number of products purchased per daily per store over a period, | |
| for grocery stores located in different areas in the same country. T | |
| he solution to this challenge can be used by small chain stores to know how much stock to order per week and per month.</p><br> | |
| """, unsafe_allow_html=True) | |
| def model_section(): | |
| st.write("""<h1>Predict Sales</h1> | |
| """, unsafe_allow_html=True) | |
| st.image(img_banner2) | |
| st.write(""" | |
| <h3>Enter the details to predict sales.<h3>""" | |
| , unsafe_allow_html=True) | |
| # Load the trained numeric imputer | |
| num_imputer = joblib.load('numeric_imputer.joblib') | |
| # Load the trained categorical imputer | |
| cat_imputer = joblib.load('categorical_imputer.joblib') | |
| # Load the trained one-hot encoder | |
| encoder = joblib.load('OneHotEncoder.joblib') | |
| # Load the final trained machine learning model | |
| final_model = joblib.load('model.joblib') | |
| # Create the input fields | |
| input_data = {} | |
| input_data['store_id'] = st.slider("store_id", 1, 54) | |
| input_data['category_id'] = st.slider("category_id", 1, 50) | |
| input_data['onpromotion'] = st.number_input("onpromotion", step=1) | |
| input_data['nbr_of_transactions'] = st.number_input("nbr_of_transactions", step=1) | |
| input_data['year'] = st.number_input("year", step=1) | |
| input_data['month'] = st.slider("month", 1, 12) | |
| input_data['dayofmonth'] = st.slider("dayofmonth", 1, 31) | |
| input_data['dayofweek'] = st.slider("dayofweek", 0, 6) | |
| input_data['dayofyear'] = st.slider("dayofyear", 1, 365) | |
| input_data['weekofyear'] = st.slider("weekofyear", 1, 52) | |
| input_data['quarter'] = st.slider("quarter", 1, 4) | |
| input_data['year_weekofyear'] = st.slider("year_weekofyear", 1, 52) | |
| input_data['store_type'] = st.slider("store_type", 1, 4) | |
| input_data['cluster'] = st.slider("cluster", 1, 16) | |
| # City selection | |
| city_options = ['Aflao', 'Akim Oda', 'Akwatia', 'Bekwai', 'Cape Coast', | |
| 'Elmina', 'Winneba', 'Teshie', 'Tema', 'Techiman', | |
| 'Tamale', 'Suhum', 'Prestea', 'Obuasi', 'Mampong', | |
| 'Kumasi', 'Koforidua', 'Kintampo', 'Hohoe', 'Ho', 'Gbawe'] | |
| input_data['city'] = st.selectbox("City", city_options) | |
| # Holiday Type selection | |
| holiday_type_options = ['Not Holiday', 'Public Holiday', 'Religious Holiday', 'School Holiday', 'Special Event'] | |
| input_data['holiday_type'] = st.selectbox("Holiday Type", holiday_type_options) | |
| # Create a button to make a prediction | |
| if st.button("Predict", key="predict_button", help="Click to make a prediction."): | |
| # Convert the input data to a pandas DataFrame | |
| input_df = pd.DataFrame([input_data]) | |
| # Numeric columns | |
| numeric_cols = input_df.select_dtypes(include=['float64', 'int64']).columns | |
| # Categorical columns | |
| categorical_cols = input_df.select_dtypes(include=['object']).columns | |
| # Apply the imputers separately for numeric and categorical columns | |
| numeric_imputed = num_imputer.transform(input_df[numeric_cols]) | |
| categorical_imputed = cat_imputer.transform(input_df[categorical_cols]) | |
| # Encode the categorical columns | |
| input_encoded_sparse = encoder.transform(categorical_imputed) | |
| input_encoded_df = pd.DataFrame(input_encoded_sparse, | |
| columns=encoder.get_feature_names_out(categorical_cols)) | |
| # Combine numeric and encoded categorical data | |
| final_df = pd.concat([input_df[numeric_cols], input_encoded_df], axis=1) | |
| # Reindex the DataFrame with original_feature_names and fill missing columns with 0 | |
| original_feature_names = ['store_id', 'category_id', 'onpromotion', 'nbr_of_transactions', | |
| 'year', 'month', 'dayofmonth', 'dayofweek', 'dayofyear', 'weekofyear', 'quarter', 'year_weekofyear', 'store_type', 'cluster', | |
| 'city_Aflao', 'city_Akim Oda', 'city_Akwatia', 'city_Bekwai', 'city_Cape Coast', 'city_Elmina', 'city_Gbawe', 'city_Ho', | |
| 'city_Hohoe', 'city_Kintampo', 'city_Koforidua', 'city_Kumasi', 'city_Mampong', 'city_Obuasi', 'city_Prestea', 'city_Suhum', | |
| 'city_Tamale', 'city_Techiman', 'city_Tema', 'city_Teshie', 'city_Winneba', 'holiday_type_Not Holiday', | |
| 'holiday_type_Public Holiday', 'holiday_type_Religious Holiday', 'holiday_type_School Holiday', 'holiday_type_Special Event'] | |
| final_df = final_df.reindex(columns=original_feature_names, fill_value=0) | |
| # Make a prediction | |
| prediction = final_model.predict(final_df)[0] | |
| # Calculate statistical values | |
| mean_sales = 423 | |
| std_sales = 1320 | |
| max_sales = 124717 | |
| percentile_75 = 237 | |
| # Display the prediction with additional details | |
| st.write("Prediction Result:") | |
| st.write(f"The predicted sales for the given input are: {prediction:.2f} units.") | |
| # Analyze the prediction in comparison to statistical values | |
| if prediction > max_sales: | |
| st.write("The predicted sales are unusually high, exceeding the maximum historical sales value.") | |
| elif prediction > mean_sales + 2 * std_sales: | |
| st.write("The predicted sales are significantly above the average with a high deviation.") | |
| elif prediction > mean_sales + std_sales: | |
| st.write("The predicted sales are above average with a relatively high deviation.") | |
| elif prediction < mean_sales - 2 * std_sales: | |
| st.write("The predicted sales are significantly below the average with a high deviation.") | |
| elif prediction < mean_sales - std_sales: | |
| st.write("The predicted sales are below average with a relatively high deviation.") | |
| else: | |
| st.write("The predicted sales are around the average range.") | |
| plt.figure(figsize=(8, 6)) | |
| sales_comparison = {'Predicted Sales': prediction, 'Max Sales': max_sales} | |
| plt.bar(sales_comparison.keys(), sales_comparison.values(), color=['blue', 'red']) | |
| plt.xlabel('Sales Type') | |
| plt.ylabel('Sales') | |
| plt.title('Comparison of Predicted Sales with Max Sales') | |
| st.pyplot(plt) | |
| def Check_EDA(): | |
| st.image(img_banner2) | |
| st.write(""" | |
| 📊🛒 Welcome to the Exploratory Data Analysis Tool for Azubian Groceries! 📈🍅 | |
| Unveil the hidden insights of our sales data with just a click of a button! 🕵️♀️🔍 | |
| Step into the world of data exploration and embark on a journey through our sales trends and patterns. 🌌💹 | |
| From the rise and fall of our sales over time to the mysterious correlations between different aspects of our business, we've got it all. 📈🔮 | |
| Dive into our data-driven adventures and uncover the story behind the numbers. 📊📚 | |
| But that's not all! 🎉📊 Ever wondered what our sales look like on a day-to-day basis? Or maybe how they evolve quarter by quarter? We've got you covered! 📅🔍 | |
| """) | |
| st.write("So go ahead, hit those buttons and start your EDA journey! 🚀🔍") | |
| # Load the dataset | |
| df = load_data() | |
| # Create time index | |
| df = create_time_index(df) | |
| # Show EDA button | |
| show_eda = st.button("Show EDA") | |
| if show_eda: | |
| # Create visualizations | |
| create_visualizations(df) | |
| def load_data(): | |
| # Load your dataset here (replace 'your_dataset.csv' with the actual file name) | |
| df = pd.read_csv('download_df.csv') | |
| return df | |
| def create_time_index(df): | |
| # Assuming you have a 'Date' column in your dataset | |
| df['Date'] = pd.to_datetime(df['Date']) | |
| df.set_index('Date', inplace=True) | |
| return df | |
| def create_visualizations(df): | |
| st.subheader("Data Overview") | |
| # Display basic statistics of the dataset | |
| st.write(df.describe()) | |
| st.subheader("Sales Over Time") | |
| # Create a line plot of sales over time | |
| plt.figure(figsize=(10, 6)) | |
| plt.plot(df.index, df['target']) | |
| plt.xlabel('Date') | |
| plt.ylabel('Sales') | |
| plt.title('Sales Over Time') | |
| st.pyplot(plt) | |
| st.subheader("Correlation Heatmap") | |
| # Create a correlation heatmap | |
| numeric_columns = df.select_dtypes(include=[np.number]).columns | |
| corr_matrix = df[numeric_columns].corr() | |
| plt.figure(figsize=(10, 8)) | |
| sns.heatmap(corr_matrix, annot=True, cmap='coolwarm') | |
| plt.title('Correlation Heatmap') | |
| st.pyplot(plt) | |
| st.subheader("Histogram of Sales") | |
| # Create a histogram of the target column (sales) | |
| plt.figure(figsize=(8, 6)) | |
| sns.histplot(df['target'], bins=30, kde=True) | |
| plt.xlabel('Sales') | |
| plt.ylabel('Frequency') | |
| plt.title('Histogram of Sales') | |
| st.pyplot(plt) | |
| st.subheader("Box Plot of Sales") | |
| # Create a box plot of the target column (sales) | |
| plt.figure(figsize=(8, 6)) | |
| sns.boxplot(data=df, y='target') | |
| plt.ylabel('Sales') | |
| plt.title('Box Plot of Sales') | |
| st.pyplot(plt) | |
| st.subheader("Total Sales by Year") | |
| # Group the data by year and calculate total sales | |
| sales_by_year = df.groupby('year')['target'].sum() | |
| # Plot the total sales by year using a bar chart | |
| plt.figure(figsize=(8, 6)) | |
| sales_by_year.plot(kind='bar', color='#1f77b4') | |
| plt.title('Total Sales by Year') | |
| plt.xlabel('Year') | |
| plt.ylabel('Total Sales') | |
| plt.xticks(rotation=0) # Keep the year labels horizontal | |
| plt.tight_layout() | |
| # Display the bar chart | |
| st.pyplot(plt) | |
| # Group the data by day of the week and calculate total sales | |
| sales_by_dayofweek = df.groupby('dayofweek')['target'].sum() | |
| # Plot the total sales by day of the week using a bar chart | |
| plt.figure(figsize=(8, 6)) | |
| days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] | |
| sales_by_dayofweek.plot(kind='bar', color='#1f77b4') | |
| plt.title('Total Sales by Day of the Week') | |
| plt.xlabel('Day of the Week') | |
| plt.ylabel('Total Sales') | |
| plt.xticks(range(7), days_of_week, rotation=45) # Set custom labels | |
| plt.tight_layout() | |
| # Display the bar chart | |
| st.pyplot(plt) | |
| st.subheader("Total Sales by Quarter") | |
| # Group the data by quarter and calculate total sales | |
| quarterly_sales = df.groupby('quarter')['target'].sum() | |
| # Plot the seasonal variation using a bar chart | |
| plt.figure(figsize=(8, 6)) | |
| ax = quarterly_sales.plot(kind='bar', color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']) | |
| plt.title('Seasonal Sales Variation by Quarter') | |
| plt.xlabel('Quarter') | |
| plt.ylabel('Total Sales') | |
| # Specify tick positions and labels for the x-axis | |
| tick_positions = range(len(quarterly_sales)) | |
| tick_labels = ['Q1', 'Q2', 'Q3', 'Q4'] | |
| ax.set_xticks(tick_positions) # Set custom ticks | |
| ax.set_xticklabels(tick_labels) # Set custom tick labels | |
| plt.tight_layout() | |
| # Display the bar chart | |
| st.pyplot(plt) | |
| with st.sidebar: | |
| st.image(img_3) | |
| selected = option_menu( | |
| menu_title=None, | |
| options=["Home", "Predict Sales", "EDA", "About",], | |
| icons=["house", "barplot", "magnifying glass", "info-circle"], | |
| styles=css_style | |
| ) | |
| if selected == "Home": | |
| home_page() | |
| elif selected == "Predict Sales": | |
| model_section() | |
| elif selected == "EDA": | |
| Check_EDA() | |
| elif selected == "About": | |
| about_page() | |