elizabethmyn's picture
Add demo for Sale forcasting
84548c1
import pickle
import lightgbm as lgbm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
def fill_misisng_values(df):
"""Fill NaN values in the 'sales' column with the mean of non-NaN values"""
df_filled = df.copy()
df_filled["sales"] = df_filled["sales"].fillna(df_filled["sales"].mean())
return df_filled
def correct_outliers(df, factor=3):
"""Identify and correct outliers in the 'sales' column by reducing them to the mean"""
df_corrected = df.copy()
# Identify outliers using z-score
z_scores = (df_corrected["sales"] - df_corrected["sales"].mean()) / df_corrected[
"sales"
].std()
outlier_indices = np.abs(z_scores) > factor # Adjust the threshold as needed
# Correct outliers by reducing them to the mean
df_corrected.loc[outlier_indices, "sales"] = df_corrected["sales"].mean()
return df_corrected
def get_sample_stores(df: pd.DataFrame, store_id: int = 1) -> pd.DataFrame:
"""Get the sample stores with store_id"""
grouped = df.groupby("store_id")
sample_store = grouped.get_group((store_id))
return sample_store
def save_data(df, file_path, file_format="feather"):
"""
Save a DataFrame to a specified file format.
Parameters:
- df (pd.DataFrame): The DataFrame to be saved.
- file_path (str): The path where the file will be saved.
- file_format (str): The format in which to save the file. Supported formats: 'feather', 'csv'.
Default is 'feather'.
Example:
```python
# Assuming df is the DataFrame you want to save
save_data(df, 'output_data.feather', file_format='feather')
```
Note:
- Make sure to have the required libraries (pandas and feather-format) installed.
"""
if file_format.lower() == "feather":
# Save to Feather format
df.to_feather(file_path)
print(f"DataFrame saved to {file_path} in Feather format.")
elif file_format.lower() == "csv":
# Save to CSV format
df.to_csv(file_path, index=False)
print(f"DataFrame saved to {file_path} in CSV format.")
else:
print(
f"Error: Unsupported file format '{file_format}'. Supported formats: 'feather', 'csv'."
)
def flatten_prophet_predictions(predictions_dict):
all_dfs = []
for store_item, df in predictions_dict.items():
df = df.copy()
df["store_item"] = store_item
all_dfs.append(df)
return pd.concat(all_dfs, ignore_index=True)
def load_model(file_path):
"""
Load a machine learning model from a file.
Parameters:
- file_path: The file path from where the model will be loaded.
Returns:
- The loaded model.
"""
try:
with open(file_path, "rb") as file:
model = pickle.load(file)
print(f"Sklearn model loaded from {file_path}")
except (pickle.UnpicklingError, FileNotFoundError):
# If loading as scikit-learn model fails or the file is not found,
# assume it is a LightGBM model (scikit-learn API)
model = lgbm.Booster(model_file=file_path)
print(f"LightGBM (scikit-learn API) model loaded from {file_path}")
return model
# Function to calculate WAPE (Weighted Absolute Percentage Error)
def weighted_absolute_percentage_error(y_true, y_pred):
"""
Calculate Weighted Absolute Percentage Error
Args:
y_true: Actual values
y_pred: Predicted values
Returns:
WAPE value (percentage)
"""
y_true, y_pred = np.array(y_true), np.array(y_pred)
return 100 * np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_true))