|
|
import os |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
import matplotlib.ticker as ticker |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import seaborn as sns |
|
|
from matplotlib.dates import DateFormatter |
|
|
|
|
|
|
|
|
plt.style.use("seaborn-v0_8-whitegrid") |
|
|
sns.set_palette("deep") |
|
|
plt.rcParams["figure.figsize"] = (14, 8) |
|
|
plt.rcParams["font.size"] = 12 |
|
|
|
|
|
|
|
|
def visualize_predictions_by_store_item(test_results, output_dir="visualizations"): |
|
|
""" |
|
|
Create visualizations of actual vs predicted values for each store-item combination. |
|
|
|
|
|
Args: |
|
|
test_results: DataFrame containing test results with columns: |
|
|
'date', 'store_name', 'item_name', 'sales', 'prediction' |
|
|
output_dir: Directory to save the visualizations |
|
|
""" |
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
store_items = test_results.groupby(["store_name", "item_name"]) |
|
|
|
|
|
|
|
|
total_combinations = len(store_items) |
|
|
print( |
|
|
f"Creating visualizations for {total_combinations} store-item combinations..." |
|
|
) |
|
|
|
|
|
|
|
|
counter = 0 |
|
|
|
|
|
|
|
|
for (store, item), group in store_items: |
|
|
|
|
|
group = group.sort_values("date") |
|
|
|
|
|
|
|
|
if not pd.api.types.is_datetime64_any_dtype(group["date"]): |
|
|
group["date"] = pd.to_datetime(group["date"]) |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(14, 6)) |
|
|
|
|
|
|
|
|
ax.plot( |
|
|
group["date"], group["sales"], "o-", label="Actual", alpha=0.7, linewidth=2 |
|
|
) |
|
|
ax.plot( |
|
|
group["date"], |
|
|
group["prediction"], |
|
|
"s--", |
|
|
label="Predicted", |
|
|
alpha=0.7, |
|
|
linewidth=2, |
|
|
) |
|
|
|
|
|
|
|
|
mae = np.mean(np.abs(group["sales"] - group["prediction"])) |
|
|
mape = ( |
|
|
np.mean(np.abs((group["sales"] - group["prediction"]) / group["sales"])) |
|
|
* 100 |
|
|
) |
|
|
|
|
|
|
|
|
ax.set_title(f"Store: {store}, Item: {item}\nMAE: {mae:.2f}, MAPE: {mape:.2f}%") |
|
|
ax.set_xlabel("Date") |
|
|
ax.set_ylabel("Sales") |
|
|
|
|
|
|
|
|
date_formatter = DateFormatter("%Y-%m-%d") |
|
|
ax.xaxis.set_major_formatter(date_formatter) |
|
|
|
|
|
plt.xticks(rotation=45) |
|
|
|
|
|
|
|
|
ax.grid(True, linestyle="--", alpha=0.7) |
|
|
|
|
|
|
|
|
ax.legend() |
|
|
|
|
|
|
|
|
plt.tight_layout() |
|
|
|
|
|
|
|
|
safe_store = store.replace(" ", "_").replace("/", "_") |
|
|
safe_item = item.replace(" ", "_").replace("/", "_") |
|
|
filename = f"{safe_store}_{safe_item}.png" |
|
|
plt.savefig(os.path.join(output_dir, filename)) |
|
|
|
|
|
|
|
|
plt.close(fig) |
|
|
|
|
|
|
|
|
counter += 1 |
|
|
if counter % 10 == 0: |
|
|
print(f"Processed {counter}/{total_combinations} combinations") |
|
|
|
|
|
print(f"All visualizations saved to {output_dir}/") |
|
|
|
|
|
|
|
|
def visualize_aggregated_predictions(test_results, output_dir="visualizations"): |
|
|
""" |
|
|
Create aggregated visualizations of actual vs predicted values by store, item, and date. |
|
|
|
|
|
Args: |
|
|
test_results: DataFrame containing test results |
|
|
output_dir: Directory to save the visualizations |
|
|
""" |
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
if not pd.api.types.is_datetime64_any_dtype(test_results["date"]): |
|
|
test_results["date"] = pd.to_datetime(test_results["date"]) |
|
|
|
|
|
|
|
|
daily_results = ( |
|
|
test_results.groupby("date") |
|
|
.agg({"sales": "sum", "prediction": "sum"}) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(14, 6)) |
|
|
ax.plot( |
|
|
daily_results["date"], |
|
|
daily_results["sales"], |
|
|
"o-", |
|
|
label="Actual", |
|
|
alpha=0.7, |
|
|
linewidth=2, |
|
|
) |
|
|
ax.plot( |
|
|
daily_results["date"], |
|
|
daily_results["prediction"], |
|
|
"s--", |
|
|
label="Predicted", |
|
|
alpha=0.7, |
|
|
linewidth=2, |
|
|
) |
|
|
|
|
|
|
|
|
ax.set_title("Total Daily Sales: Actual vs Predicted") |
|
|
ax.set_xlabel("Date") |
|
|
ax.set_ylabel("Total Sales") |
|
|
|
|
|
|
|
|
date_formatter = DateFormatter("%Y-%m-%d") |
|
|
ax.xaxis.set_major_formatter(date_formatter) |
|
|
plt.xticks(rotation=45) |
|
|
|
|
|
|
|
|
ax.grid(True, linestyle="--", alpha=0.7) |
|
|
ax.legend() |
|
|
|
|
|
|
|
|
plt.tight_layout() |
|
|
plt.savefig(os.path.join(output_dir, "total_daily_sales.png")) |
|
|
plt.close(fig) |
|
|
|
|
|
|
|
|
store_results = ( |
|
|
test_results.groupby(["store_name", "date"]) |
|
|
.agg({"sales": "sum", "prediction": "sum"}) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
|
|
|
stores = store_results["store_name"].unique() |
|
|
for store in stores: |
|
|
store_data = store_results[store_results["store_name"] == store] |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(14, 6)) |
|
|
ax.plot( |
|
|
store_data["date"], |
|
|
store_data["sales"], |
|
|
"o-", |
|
|
label="Actual", |
|
|
alpha=0.7, |
|
|
linewidth=2, |
|
|
) |
|
|
ax.plot( |
|
|
store_data["date"], |
|
|
store_data["prediction"], |
|
|
"s--", |
|
|
label="Predicted", |
|
|
alpha=0.7, |
|
|
linewidth=2, |
|
|
) |
|
|
|
|
|
|
|
|
ax.set_title(f"Store: {store} - Total Daily Sales") |
|
|
ax.set_xlabel("Date") |
|
|
ax.set_ylabel("Total Sales") |
|
|
|
|
|
|
|
|
ax.xaxis.set_major_formatter(date_formatter) |
|
|
plt.xticks(rotation=45) |
|
|
|
|
|
|
|
|
ax.grid(True, linestyle="--", alpha=0.7) |
|
|
ax.legend() |
|
|
|
|
|
|
|
|
plt.tight_layout() |
|
|
safe_store = store.replace(" ", "_").replace("/", "_") |
|
|
plt.savefig(os.path.join(output_dir, f"store_{safe_store}_total.png")) |
|
|
plt.close(fig) |
|
|
|
|
|
|
|
|
item_results = ( |
|
|
test_results.groupby(["item_name", "date"]) |
|
|
.agg({"sales": "sum", "prediction": "sum"}) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
|
|
|
items = item_results["item_name"].unique() |
|
|
for item in items: |
|
|
item_data = item_results[item_results["item_name"] == item] |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(14, 6)) |
|
|
ax.plot( |
|
|
item_data["date"], |
|
|
item_data["sales"], |
|
|
"o-", |
|
|
label="Actual", |
|
|
alpha=0.7, |
|
|
linewidth=2, |
|
|
) |
|
|
ax.plot( |
|
|
item_data["date"], |
|
|
item_data["prediction"], |
|
|
"s--", |
|
|
label="Predicted", |
|
|
alpha=0.7, |
|
|
linewidth=2, |
|
|
) |
|
|
|
|
|
|
|
|
ax.set_title(f"Item: {item} - Total Daily Sales") |
|
|
ax.set_xlabel("Date") |
|
|
ax.set_ylabel("Total Sales") |
|
|
|
|
|
|
|
|
ax.xaxis.set_major_formatter(date_formatter) |
|
|
plt.xticks(rotation=45) |
|
|
|
|
|
|
|
|
ax.grid(True, linestyle="--", alpha=0.7) |
|
|
ax.legend() |
|
|
|
|
|
|
|
|
plt.tight_layout() |
|
|
safe_item = item.replace(" ", "_").replace("/", "_") |
|
|
plt.savefig(os.path.join(output_dir, f"item_{safe_item}_total.png")) |
|
|
plt.close(fig) |
|
|
|
|
|
print(f"Aggregated visualizations saved to {output_dir}/") |
|
|
|
|
|
|
|
|
def create_interactive_dashboard(test_results, output_dir="visualizations"): |
|
|
""" |
|
|
Create an interactive HTML dashboard with plots for all store-item combinations. |
|
|
Requires Plotly and Dash libraries. |
|
|
|
|
|
Args: |
|
|
test_results: DataFrame containing test results |
|
|
output_dir: Directory to save the dashboard |
|
|
""" |
|
|
try: |
|
|
import plotly.express as px |
|
|
import plotly.graph_objects as go |
|
|
from plotly.subplots import make_subplots |
|
|
|
|
|
print("Creating interactive dashboard...") |
|
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
if not pd.api.types.is_datetime64_any_dtype(test_results["date"]): |
|
|
test_results["date"] = pd.to_datetime(test_results["date"]) |
|
|
|
|
|
|
|
|
daily_results = ( |
|
|
test_results.groupby("date") |
|
|
.agg({"sales": "sum", "prediction": "sum"}) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
fig = go.Figure() |
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=daily_results["date"], |
|
|
y=daily_results["sales"], |
|
|
mode="lines+markers", |
|
|
name="Actual", |
|
|
line=dict(color="blue"), |
|
|
) |
|
|
) |
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=daily_results["date"], |
|
|
y=daily_results["prediction"], |
|
|
mode="lines+markers", |
|
|
name="Predicted", |
|
|
line=dict(color="red", dash="dash"), |
|
|
) |
|
|
) |
|
|
|
|
|
fig.update_layout( |
|
|
title="Overall Sales Performance: Actual vs Predicted", |
|
|
xaxis_title="Date", |
|
|
yaxis_title="Total Sales", |
|
|
legend_title="Series", |
|
|
height=600, |
|
|
) |
|
|
|
|
|
|
|
|
fig.write_html(os.path.join(output_dir, "overall_performance.html")) |
|
|
|
|
|
|
|
|
store_item_error = ( |
|
|
test_results.groupby(["store_name", "item_name"]) |
|
|
.apply( |
|
|
lambda x: np.mean(np.abs((x["sales"] - x["prediction"]) / x["sales"])) |
|
|
* 100 |
|
|
) |
|
|
.reset_index() |
|
|
) |
|
|
store_item_error.columns = ["store_name", "item_name", "mape"] |
|
|
|
|
|
|
|
|
heatmap_data = store_item_error.pivot( |
|
|
index="store_name", columns="item_name", values="mape" |
|
|
) |
|
|
|
|
|
|
|
|
heatmap_fig = px.imshow( |
|
|
heatmap_data, |
|
|
labels=dict(x="Item", y="Store", color="MAPE (%)"), |
|
|
x=heatmap_data.columns, |
|
|
y=heatmap_data.index, |
|
|
color_continuous_scale="RdBu_r", |
|
|
title="Mean Absolute Percentage Error by Store and Item", |
|
|
) |
|
|
|
|
|
heatmap_fig.update_layout(height=800, width=1200) |
|
|
|
|
|
|
|
|
heatmap_fig.write_html(os.path.join(output_dir, "error_heatmap.html")) |
|
|
|
|
|
print(f"Interactive dashboard elements saved to {output_dir}/") |
|
|
|
|
|
except ImportError: |
|
|
print("Could not create interactive dashboard. Plotly library is required.") |
|
|
print("Install it with: pip install plotly dash") |
|
|
|
|
|
|
|
|
def visualize_error_distribution(test_results, output_dir="visualizations"): |
|
|
""" |
|
|
Visualize the distribution and patterns of prediction errors. |
|
|
|
|
|
Args: |
|
|
test_results: DataFrame containing test results |
|
|
output_dir: Directory to save the visualizations |
|
|
""" |
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
test_results["error"] = test_results["sales"] - test_results["prediction"] |
|
|
test_results["abs_error"] = np.abs(test_results["error"]) |
|
|
test_results["pct_error"] = (test_results["error"] / test_results["sales"]) * 100 |
|
|
|
|
|
|
|
|
plt.figure(figsize=(12, 6)) |
|
|
sns.histplot(test_results["error"], kde=True, bins=50) |
|
|
plt.axvline(x=0, color="red", linestyle="--") |
|
|
plt.title("Distribution of Prediction Errors") |
|
|
plt.xlabel("Error (Actual - Predicted)") |
|
|
plt.ylabel("Frequency") |
|
|
plt.grid(True, linestyle="--", alpha=0.7) |
|
|
plt.tight_layout() |
|
|
plt.savefig(os.path.join(output_dir, "error_distribution.png")) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
plt.figure(figsize=(12, 6)) |
|
|
plt.scatter(test_results["sales"], test_results["error"], alpha=0.5) |
|
|
plt.axhline(y=0, color="red", linestyle="--") |
|
|
plt.title("Prediction Error vs Actual Sales") |
|
|
plt.xlabel("Actual Sales") |
|
|
plt.ylabel("Error (Actual - Predicted)") |
|
|
plt.grid(True, linestyle="--", alpha=0.7) |
|
|
plt.tight_layout() |
|
|
plt.savefig(os.path.join(output_dir, "error_vs_sales.png")) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
plt.figure(figsize=(14, 6)) |
|
|
|
|
|
if not pd.api.types.is_datetime64_any_dtype(test_results["date"]): |
|
|
test_results["date"] = pd.to_datetime(test_results["date"]) |
|
|
|
|
|
|
|
|
daily_error = test_results.groupby("date")["error"].mean().reset_index() |
|
|
plt.plot(daily_error["date"], daily_error["error"], "o-") |
|
|
plt.axhline(y=0, color="red", linestyle="--") |
|
|
plt.title("Mean Prediction Error Over Time") |
|
|
plt.xlabel("Date") |
|
|
plt.ylabel("Mean Error") |
|
|
date_formatter = DateFormatter("%Y-%m-%d") |
|
|
plt.gca().xaxis.set_major_formatter(date_formatter) |
|
|
plt.xticks(rotation=45) |
|
|
plt.grid(True, linestyle="--", alpha=0.7) |
|
|
plt.tight_layout() |
|
|
plt.savefig(os.path.join(output_dir, "error_over_time.png")) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
test_results["day_of_week"] = test_results["date"].dt.dayofweek |
|
|
test_results["day_name"] = test_results["date"].dt.day_name() |
|
|
|
|
|
plt.figure(figsize=(12, 6)) |
|
|
day_error = ( |
|
|
test_results.groupby("day_name")["pct_error"] |
|
|
.mean() |
|
|
.reindex( |
|
|
[ |
|
|
"Monday", |
|
|
"Tuesday", |
|
|
"Wednesday", |
|
|
"Thursday", |
|
|
"Friday", |
|
|
"Saturday", |
|
|
"Sunday", |
|
|
] |
|
|
) |
|
|
) |
|
|
sns.barplot(x=day_error.index, y=day_error.values) |
|
|
plt.title("Mean Percentage Error by Day of Week") |
|
|
plt.xlabel("Day of Week") |
|
|
plt.ylabel("Mean Percentage Error (%)") |
|
|
plt.axhline(y=0, color="red", linestyle="--") |
|
|
plt.grid(True, linestyle="--", alpha=0.7) |
|
|
plt.tight_layout() |
|
|
plt.savefig(os.path.join(output_dir, "error_by_day_of_week.png")) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
if "category" in test_results.columns: |
|
|
plt.figure(figsize=(12, 6)) |
|
|
cat_error = test_results.groupby("category")["pct_error"].mean().sort_values() |
|
|
sns.barplot(x=cat_error.index, y=cat_error.values) |
|
|
plt.title("Mean Percentage Error by Category") |
|
|
plt.xlabel("Category") |
|
|
plt.ylabel("Mean Percentage Error (%)") |
|
|
plt.axhline(y=0, color="red", linestyle="--") |
|
|
plt.xticks(rotation=45) |
|
|
plt.grid(True, linestyle="--", alpha=0.7) |
|
|
plt.tight_layout() |
|
|
plt.savefig(os.path.join(output_dir, "error_by_category.png")) |
|
|
plt.close() |
|
|
|
|
|
print(f"Error analysis visualizations saved to {output_dir}/") |
|
|
|
|
|
|
|
|
def create_forecast_dashboard( |
|
|
model, X_test, y_test, test_results, data, output_dir="visualizations" |
|
|
): |
|
|
""" |
|
|
Create a comprehensive dashboard of forecast visualizations. |
|
|
|
|
|
Args: |
|
|
model: Trained model |
|
|
X_test: Test features |
|
|
y_test: Test target values |
|
|
test_results: DataFrame with test results |
|
|
data: Original data with date, store, item info |
|
|
output_dir: Directory to save visualizations |
|
|
""" |
|
|
|
|
|
print("Creating forecast visualizations...") |
|
|
|
|
|
|
|
|
|
|
|
store_item_sales = ( |
|
|
test_results.groupby(["store_name", "item_name"])["sales"].sum().reset_index() |
|
|
) |
|
|
top_combinations = store_item_sales.sort_values("sales", ascending=False).head(20) |
|
|
|
|
|
|
|
|
top_results = pd.merge( |
|
|
test_results, |
|
|
top_combinations[["store_name", "item_name"]], |
|
|
on=["store_name", "item_name"], |
|
|
) |
|
|
|
|
|
|
|
|
visualize_predictions_by_store_item(top_results, output_dir) |
|
|
|
|
|
|
|
|
visualize_aggregated_predictions(test_results, output_dir) |
|
|
|
|
|
|
|
|
visualize_error_distribution(test_results, output_dir) |
|
|
|
|
|
|
|
|
create_interactive_dashboard(test_results, output_dir) |
|
|
|
|
|
print("Forecast visualization dashboard created successfully!") |
|
|
|