Spaces:

elizabethmyn
/

Intelligent-Retail-Decision-Making-System

Sleeping

App Files Files Community

Intelligent-Retail-Decision-Making-System / app /utils /utils.py

elizabethmyn

Add demo for Sale forcasting

84548c1 28 days ago

raw

history blame contribute delete

3.68 kB

	import pickle

	import lightgbm as lgbm
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import seaborn as sns


	def fill_misisng_values(df):
	"""Fill NaN values in the 'sales' column with the mean of non-NaN values"""
	df_filled = df.copy()
	df_filled["sales"] = df_filled["sales"].fillna(df_filled["sales"].mean())
	return df_filled


	def correct_outliers(df, factor=3):
	"""Identify and correct outliers in the 'sales' column by reducing them to the mean"""
	df_corrected = df.copy()

	# Identify outliers using z-score
	z_scores = (df_corrected["sales"] - df_corrected["sales"].mean()) / df_corrected[
	"sales"
	].std()
	outlier_indices = np.abs(z_scores) > factor # Adjust the threshold as needed
	# Correct outliers by reducing them to the mean
	df_corrected.loc[outlier_indices, "sales"] = df_corrected["sales"].mean()

	return df_corrected


	def get_sample_stores(df: pd.DataFrame, store_id: int = 1) -> pd.DataFrame:
	"""Get the sample stores with store_id"""
	grouped = df.groupby("store_id")
	sample_store = grouped.get_group((store_id))
	return sample_store


	def save_data(df, file_path, file_format="feather"):
	"""
	Save a DataFrame to a specified file format.

	Parameters:
	- df (pd.DataFrame): The DataFrame to be saved.
	- file_path (str): The path where the file will be saved.
	- file_format (str): The format in which to save the file. Supported formats: 'feather', 'csv'.
	Default is 'feather'.
	Example:
	```python
	# Assuming df is the DataFrame you want to save
	save_data(df, 'output_data.feather', file_format='feather')
	```

	Note:
	- Make sure to have the required libraries (pandas and feather-format) installed.
	"""
	if file_format.lower() == "feather":
	# Save to Feather format
	df.to_feather(file_path)
	print(f"DataFrame saved to {file_path} in Feather format.")
	elif file_format.lower() == "csv":
	# Save to CSV format
	df.to_csv(file_path, index=False)
	print(f"DataFrame saved to {file_path} in CSV format.")
	else:
	print(
	f"Error: Unsupported file format '{file_format}'. Supported formats: 'feather', 'csv'."
	)


	def flatten_prophet_predictions(predictions_dict):
	all_dfs = []

	for store_item, df in predictions_dict.items():
	df = df.copy()
	df["store_item"] = store_item
	all_dfs.append(df)

	return pd.concat(all_dfs, ignore_index=True)


	def load_model(file_path):
	"""
	Load a machine learning model from a file.

	Parameters:
	- file_path: The file path from where the model will be loaded.

	Returns:
	- The loaded model.
	"""
	try:
	with open(file_path, "rb") as file:
	model = pickle.load(file)
	print(f"Sklearn model loaded from {file_path}")

	except (pickle.UnpicklingError, FileNotFoundError):
	# If loading as scikit-learn model fails or the file is not found,
	# assume it is a LightGBM model (scikit-learn API)
	model = lgbm.Booster(model_file=file_path)
	print(f"LightGBM (scikit-learn API) model loaded from {file_path}")

	return model


	# Function to calculate WAPE (Weighted Absolute Percentage Error)
	def weighted_absolute_percentage_error(y_true, y_pred):
	"""
	Calculate Weighted Absolute Percentage Error

	Args:
	y_true: Actual values
	y_pred: Predicted values

	Returns:
	WAPE value (percentage)
	"""
	y_true, y_pred = np.array(y_true), np.array(y_pred)
	return 100 * np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_true))