Abubakari's picture
Update app.py
3cf287f
# home.py
import streamlit as st
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from streamlit_option_menu import option_menu
st.set_page_config(page_title="Sales Forecasting", page_icon="SF", initial_sidebar_state="expanded")
# Loading images
img_banner = Image.open("Forecast1.jpeg")
img_banner2 = Image.open("Forecast2.png")
img_3 = Image.open("Forecast3.jpg")
# Define custom CSS styles
css_style = {
"body": {"background-color": "#F0F0F0"},
"header": {"background-color": "#FF4C1B", "color": "white", "padding": "1rem", "text-align": "center"},
"content": {"padding": "2rem"},
"forecast-button": {"background-color": "#FF4C1B", "color": "white", "padding": "0.5rem 1rem", "border": "none"},
}
# Apply custom CSS to Streamlit components
st.markdown(
"""
<style>
body {
background-color: #F0F0F0;
}
</style>
""",
unsafe_allow_html=True,
)
st.markdown(
f"""
<style>
.reportview-container .main .block-container {{
max-width: 1000px;
padding-top: 2rem;
padding-bottom: 2rem;
padding-right: 2rem;
padding-left: 2rem;
}}
</style>
""",
unsafe_allow_html=True,
)
def home_page():
st.write(f"""# Intelligent Sales Forecasting""", unsafe_allow_html=True)
st.image(img_banner)
st.write(f"""<h2>The Problem</h2>
<p>Sales forecasting is a crucial aspect of business planning and decision-making. Accurate sales predictions help
businesses manage inventory, allocate resources, and set realistic targets. However, forecasting sales can be
challenging due to various factors such as market fluctuations, seasonality, and changing consumer behavior.
Traditional methods for sales forecasting may not provide the level of accuracy and agility needed to respond to
dynamic market conditions.
To address these challenges, our Sales Forecasting Project aims to develop a robust and data-driven sales
forecasting system using advanced machine learning techniques. This system will enable businesses to make
informed decisions, optimize their operations, and adapt to market changes more effectively.</p> """,
unsafe_allow_html=True)
st.write(f"""<h2>Project Goals</h2>
In the Sales Forecasting Project, our primary objective is to develop an accurate and robust machine learning
model that can predict sales with high precision. The model will utilize a variety of input features, such as
historical sales data and seasonality factors, to make predictions
about future sales trends.
Specifically, we aim to:
1. Analyze a comprehensive dataset that includes relevant features for sales forecasting.
2. Explore and preprocess the data to handle missing values, outliers, and ensure data quality.
3. Design and train a machine learning model using advanced algorithms to capture complex sales patterns.
4. Evaluate the model's performance using appropriate metrics and fine-tune it for optimal accuracy.
5. Develop a user-friendly interface that allows users to input relevant parameters and obtain sales predictions.
6. Provide actionable insights and recommendations based on the forecasted sales trends.
By achieving these goals, we aim to empower businesses with a valuable tool for strategic planning, resource
allocation, and decision-making, ultimately driving better business outcomes. """,
unsafe_allow_html=True)
def about_page():
st.write("""<h1>Project background</h1>""", unsafe_allow_html=True)
st.image(img_banner2)
st.write("""
<p>Sales forecasting is the foundation of a business’s financial story.
Once you have your sales forecast you can create profit and loss statements, cash flow statements and balance sheets, thus helping you set goals for your company.
Proper forecasting also ensures you have the right stock at all times and leads to less wasted stock.
Having the skill to create a sales forecast will help you manage anything from a small business up to a large company,
where you need to inform investors about your forecasts for a months, quarter or a year.
The objective of this challenge is to create a model to forecast the number of products purchased per daily per store over a period,
for grocery stores located in different areas in the same country. T
he solution to this challenge can be used by small chain stores to know how much stock to order per week and per month.</p><br>
""", unsafe_allow_html=True)
def model_section():
st.write("""<h1>Predict Sales</h1>
""", unsafe_allow_html=True)
st.image(img_banner2)
st.write("""
<h3>Enter the details to predict sales.<h3>"""
, unsafe_allow_html=True)
# Load the trained numeric imputer
num_imputer = joblib.load('numeric_imputer.joblib')
# Load the trained categorical imputer
cat_imputer = joblib.load('categorical_imputer.joblib')
# Load the trained one-hot encoder
encoder = joblib.load('OneHotEncoder.joblib')
# Load the final trained machine learning model
final_model = joblib.load('model.joblib')
# Create the input fields
input_data = {}
input_data['store_id'] = st.slider("store_id", 1, 54)
input_data['category_id'] = st.slider("category_id", 1, 50)
input_data['onpromotion'] = st.number_input("onpromotion", step=1)
input_data['nbr_of_transactions'] = st.number_input("nbr_of_transactions", step=1)
input_data['year'] = st.number_input("year", step=1)
input_data['month'] = st.slider("month", 1, 12)
input_data['dayofmonth'] = st.slider("dayofmonth", 1, 31)
input_data['dayofweek'] = st.slider("dayofweek", 0, 6)
input_data['dayofyear'] = st.slider("dayofyear", 1, 365)
input_data['weekofyear'] = st.slider("weekofyear", 1, 52)
input_data['quarter'] = st.slider("quarter", 1, 4)
input_data['year_weekofyear'] = st.slider("year_weekofyear", 1, 52)
input_data['store_type'] = st.slider("store_type", 1, 4)
input_data['cluster'] = st.slider("cluster", 1, 16)
# City selection
city_options = ['Aflao', 'Akim Oda', 'Akwatia', 'Bekwai', 'Cape Coast',
'Elmina', 'Winneba', 'Teshie', 'Tema', 'Techiman',
'Tamale', 'Suhum', 'Prestea', 'Obuasi', 'Mampong',
'Kumasi', 'Koforidua', 'Kintampo', 'Hohoe', 'Ho', 'Gbawe']
input_data['city'] = st.selectbox("City", city_options)
# Holiday Type selection
holiday_type_options = ['Not Holiday', 'Public Holiday', 'Religious Holiday', 'School Holiday', 'Special Event']
input_data['holiday_type'] = st.selectbox("Holiday Type", holiday_type_options)
# Create a button to make a prediction
if st.button("Predict", key="predict_button", help="Click to make a prediction."):
# Convert the input data to a pandas DataFrame
input_df = pd.DataFrame([input_data])
# Numeric columns
numeric_cols = input_df.select_dtypes(include=['float64', 'int64']).columns
# Categorical columns
categorical_cols = input_df.select_dtypes(include=['object']).columns
# Apply the imputers separately for numeric and categorical columns
numeric_imputed = num_imputer.transform(input_df[numeric_cols])
categorical_imputed = cat_imputer.transform(input_df[categorical_cols])
# Encode the categorical columns
input_encoded_sparse = encoder.transform(categorical_imputed)
input_encoded_df = pd.DataFrame(input_encoded_sparse,
columns=encoder.get_feature_names_out(categorical_cols))
# Combine numeric and encoded categorical data
final_df = pd.concat([input_df[numeric_cols], input_encoded_df], axis=1)
# Reindex the DataFrame with original_feature_names and fill missing columns with 0
original_feature_names = ['store_id', 'category_id', 'onpromotion', 'nbr_of_transactions',
'year', 'month', 'dayofmonth', 'dayofweek', 'dayofyear', 'weekofyear', 'quarter', 'year_weekofyear', 'store_type', 'cluster',
'city_Aflao', 'city_Akim Oda', 'city_Akwatia', 'city_Bekwai', 'city_Cape Coast', 'city_Elmina', 'city_Gbawe', 'city_Ho',
'city_Hohoe', 'city_Kintampo', 'city_Koforidua', 'city_Kumasi', 'city_Mampong', 'city_Obuasi', 'city_Prestea', 'city_Suhum',
'city_Tamale', 'city_Techiman', 'city_Tema', 'city_Teshie', 'city_Winneba', 'holiday_type_Not Holiday',
'holiday_type_Public Holiday', 'holiday_type_Religious Holiday', 'holiday_type_School Holiday', 'holiday_type_Special Event']
final_df = final_df.reindex(columns=original_feature_names, fill_value=0)
# Make a prediction
prediction = final_model.predict(final_df)[0]
# Calculate statistical values
mean_sales = 423
std_sales = 1320
max_sales = 124717
percentile_75 = 237
# Display the prediction with additional details
st.write("Prediction Result:")
st.write(f"The predicted sales for the given input are: {prediction:.2f} units.")
# Analyze the prediction in comparison to statistical values
if prediction > max_sales:
st.write("The predicted sales are unusually high, exceeding the maximum historical sales value.")
elif prediction > mean_sales + 2 * std_sales:
st.write("The predicted sales are significantly above the average with a high deviation.")
elif prediction > mean_sales + std_sales:
st.write("The predicted sales are above average with a relatively high deviation.")
elif prediction < mean_sales - 2 * std_sales:
st.write("The predicted sales are significantly below the average with a high deviation.")
elif prediction < mean_sales - std_sales:
st.write("The predicted sales are below average with a relatively high deviation.")
else:
st.write("The predicted sales are around the average range.")
plt.figure(figsize=(8, 6))
sales_comparison = {'Predicted Sales': prediction, 'Max Sales': max_sales}
plt.bar(sales_comparison.keys(), sales_comparison.values(), color=['blue', 'red'])
plt.xlabel('Sales Type')
plt.ylabel('Sales')
plt.title('Comparison of Predicted Sales with Max Sales')
st.pyplot(plt)
def Check_EDA():
st.image(img_banner2)
st.write("""
📊🛒 Welcome to the Exploratory Data Analysis Tool for Azubian Groceries! 📈🍅
Unveil the hidden insights of our sales data with just a click of a button! 🕵️‍♀️🔍
Step into the world of data exploration and embark on a journey through our sales trends and patterns. 🌌💹
From the rise and fall of our sales over time to the mysterious correlations between different aspects of our business, we've got it all. 📈🔮
Dive into our data-driven adventures and uncover the story behind the numbers. 📊📚
But that's not all! 🎉📊 Ever wondered what our sales look like on a day-to-day basis? Or maybe how they evolve quarter by quarter? We've got you covered! 📅🔍
""")
st.write("So go ahead, hit those buttons and start your EDA journey! 🚀🔍")
# Load the dataset
df = load_data()
# Create time index
df = create_time_index(df)
# Show EDA button
show_eda = st.button("Show EDA")
if show_eda:
# Create visualizations
create_visualizations(df)
def load_data():
# Load your dataset here (replace 'your_dataset.csv' with the actual file name)
df = pd.read_csv('download_df.csv')
return df
def create_time_index(df):
# Assuming you have a 'Date' column in your dataset
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
return df
def create_visualizations(df):
st.subheader("Data Overview")
# Display basic statistics of the dataset
st.write(df.describe())
st.subheader("Sales Over Time")
# Create a line plot of sales over time
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['target'])
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Sales Over Time')
st.pyplot(plt)
st.subheader("Correlation Heatmap")
# Create a correlation heatmap
numeric_columns = df.select_dtypes(include=[np.number]).columns
corr_matrix = df[numeric_columns].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
st.pyplot(plt)
st.subheader("Histogram of Sales")
# Create a histogram of the target column (sales)
plt.figure(figsize=(8, 6))
sns.histplot(df['target'], bins=30, kde=True)
plt.xlabel('Sales')
plt.ylabel('Frequency')
plt.title('Histogram of Sales')
st.pyplot(plt)
st.subheader("Box Plot of Sales")
# Create a box plot of the target column (sales)
plt.figure(figsize=(8, 6))
sns.boxplot(data=df, y='target')
plt.ylabel('Sales')
plt.title('Box Plot of Sales')
st.pyplot(plt)
st.subheader("Total Sales by Year")
# Group the data by year and calculate total sales
sales_by_year = df.groupby('year')['target'].sum()
# Plot the total sales by year using a bar chart
plt.figure(figsize=(8, 6))
sales_by_year.plot(kind='bar', color='#1f77b4')
plt.title('Total Sales by Year')
plt.xlabel('Year')
plt.ylabel('Total Sales')
plt.xticks(rotation=0) # Keep the year labels horizontal
plt.tight_layout()
# Display the bar chart
st.pyplot(plt)
# Group the data by day of the week and calculate total sales
sales_by_dayofweek = df.groupby('dayofweek')['target'].sum()
# Plot the total sales by day of the week using a bar chart
plt.figure(figsize=(8, 6))
days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
sales_by_dayofweek.plot(kind='bar', color='#1f77b4')
plt.title('Total Sales by Day of the Week')
plt.xlabel('Day of the Week')
plt.ylabel('Total Sales')
plt.xticks(range(7), days_of_week, rotation=45) # Set custom labels
plt.tight_layout()
# Display the bar chart
st.pyplot(plt)
st.subheader("Total Sales by Quarter")
# Group the data by quarter and calculate total sales
quarterly_sales = df.groupby('quarter')['target'].sum()
# Plot the seasonal variation using a bar chart
plt.figure(figsize=(8, 6))
ax = quarterly_sales.plot(kind='bar', color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
plt.title('Seasonal Sales Variation by Quarter')
plt.xlabel('Quarter')
plt.ylabel('Total Sales')
# Specify tick positions and labels for the x-axis
tick_positions = range(len(quarterly_sales))
tick_labels = ['Q1', 'Q2', 'Q3', 'Q4']
ax.set_xticks(tick_positions) # Set custom ticks
ax.set_xticklabels(tick_labels) # Set custom tick labels
plt.tight_layout()
# Display the bar chart
st.pyplot(plt)
with st.sidebar:
st.image(img_3)
selected = option_menu(
menu_title=None,
options=["Home", "Predict Sales", "EDA", "About",],
icons=["house", "barplot", "magnifying glass", "info-circle"],
styles=css_style
)
if selected == "Home":
home_page()
elif selected == "Predict Sales":
model_section()
elif selected == "EDA":
Check_EDA()
elif selected == "About":
about_page()