csv-powerpoint-generator / src /generator.py
andreacalcagni's picture
Add department code mapping to handle raw CSV department names
ee2f0c2
"""
PowerPoint Generation Module for Manufacturing KPI Reports
This module extracts the core logic from Briva_v6.ipynb notebook to generate
PowerPoint presentations from CSV manufacturing data.
Main function: make_ppt(csv_path, reparto_code) -> str (path to generated .pptx)
"""
import os
import locale
import tempfile
import calendar
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN, MSO_VERTICAL_ANCHOR
from pptx.enum.shapes import PP_PLACEHOLDER
from pptx.dml.color import RGBColor
from pptx.enum.dml import MSO_LINE_DASH_STYLE
from lxml import etree
from pptx.oxml.ns import qn
# Additional imports for new functions
import matplotlib.patheffects as path_effects
from openai import OpenAI
from dotenv import load_dotenv
# =============================================================================
# πŸ“‹ CONSTANTS AND CONFIGURATION
# =============================================================================
# Expected CSV columns for validation
EXPECTED_COLUMNS = [
'ANNO', 'MESE', 'MESE_DESCR', 'GIORNO', 'SETT', 'ORE_MESE', 'N_MACCH',
'DATA_DA_ORE6_A_6', 'TURNO', 'REPARTO', 'MACCHINA', 'BOLLA', 'FASE',
'H_SCHED', 'H_PROG', 'N_PZ', 'QTSCA', 'QTNC', 'H_LAV', 'CAU_FER',
'H_FER', 'CAU_ATT', 'OREATT', 'CAU_CAMP', 'ORECAMP', 'CONT_LAV',
'IMPRONTE_LAV', 'H_DISP', 'REPARTO_DESCR', 'CAU_F_DESCR', 'H_FER_PRO',
'C11FER', 'C20FER', 'ARTICOLO', 'DESCRIZIONE', 'STAMPO', 'IMPT', 'CT',
'H_ATT_T', 'H_CAMP_T', 'TLAVTEOR', 'BATTUTE', 'CICLOR', 'PGP',
'H_FER_T', 'H_FER_MICRO', 'H_PIAN', 'H_DISP_KPI', 'H_PROG_PIAN',
'NPZ_NODICH', 'DATA_AGG'
]
# Target KPI map per reparto (values in percentage)
TARGET_KPI_MAP = {
'ST': { # STAMPAGGIO
'EFF_REP': 91.0,
'EFF_PRO': 94.0,
'EFF_SC': 98.5,
'EFF_E': 93.0,
'OEE': 80.0
},
'MS': { # STAMPAGGIO SURLYN
'EFF_REP': 93.0,
'EFF_PRO': 95.0,
'EFF_SC': 96.0,
'EFF_E': 92.0,
'OEE': 85.0
},
'DC': { # DECORAZIONI
'EFF_REP': 84.0,
'EFF_PRO': 87.0,
'EFF_SC': 96.5,
'EFF_E': 91.0,
'OEE': 80.0
},
'AS': { # ASSEMBLAGGIO
'EFF_REP': 91.0,
'EFF_PRO': 94.0,
'EFF_SC': 98.5,
'EFF_E': 93.0,
'OEE': 80.0
},
'IS': { # INIEZIONE SOFFIAGGIO
'EFF_REP': 91.0,
'EFF_PRO': 94.0,
'EFF_SC': 98.5,
'EFF_E': 93.0,
'OEE': 80.0
},
'PS': { # PRODUZIONE SCOVOLI
'EFF_REP': 91.0,
'EFF_PRO': 94.0,
'EFF_SC': 98.5,
'EFF_E': 93.0,
'OEE': 80.0
}
}
# Reparto descriptions
REPARTO_DESCRIPTIONS = {
'AS': 'ASSEMBLAGGIO',
'DC': 'DECORAZIONI',
'IS': 'INIEZIONE SOFFIAGGIO',
'MS': 'STAMPAGGIO SURLYN',
'PS': 'PRODUZIONE SCOVOLI',
'ST': 'STAMPAGGIO'
}
# Corporate colors for charts
CHART_COLORS = {
'previous_year': '#00AEEF', # Corporate blue
'target': '#E6E2E0', # Light taupe
'selected_year': '#4D4D4F' # Dark grey
}
# Italian month names (1-indexed to match calendar.month_name)
ITALIAN_MONTH_NAMES = [
"", "GENNAIO", "FEBBRAIO", "MARZO", "APRILE", "MAGGIO", "GIUGNO",
"LUGLIO", "AGOSTO", "SETTEMBRE", "OTTOBRE", "NOVEMBRE", "DICEMBRE"
]
# Set Italian locale for month names (with fallback)
try:
locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
except locale.Error:
try:
locale.setlocale(locale.LC_TIME, 'it_IT')
except locale.Error:
pass # Use default locale
# =============================================================================
# πŸ“Š DATA LOADING AND VALIDATION
# =============================================================================
def load_data_file(file_path: str) -> pd.DataFrame:
"""
Load data from CSV or Excel file with robust encoding detection and fallback.
For .xlsb files, automatically loads the 'Dati Grezzi' sheet.
For .csv files, uses encoding fallback detection.
Args:
file_path: Path to the CSV or .xlsb file
Returns:
Loaded DataFrame
Raises:
ValueError: If file cannot be loaded or required sheet is missing
"""
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension == '.xlsb':
try:
# Read .xlsb file and look for 'Dati Grezzi' sheet
excel_file = pd.ExcelFile(file_path, engine='pyxlsb')
# Check if 'Dati Grezzi' sheet exists
if 'Dati Grezzi' not in excel_file.sheet_names:
available_sheets = ', '.join(excel_file.sheet_names)
raise ValueError(f"Sheet 'Dati Grezzi' not found. Available sheets: {available_sheets}")
# Load the 'Dati Grezzi' sheet
df = pd.read_excel(file_path, sheet_name='Dati Grezzi', engine='pyxlsb')
print(f"βœ… Successfully loaded .xlsb file from sheet 'Dati Grezzi'")
return df
except Exception as e:
if "Sheet 'Dati Grezzi' not found" in str(e):
raise # Re-raise our custom error
raise ValueError(f"❌ Failed to load .xlsb file: {str(e)}")
elif file_extension == '.csv':
# Use existing CSV loading logic with encoding fallback
encodings_to_try = ['utf-8', 'iso-8859-1', 'cp1252', 'latin1']
for encoding in encodings_to_try:
try:
df = pd.read_csv(file_path, encoding=encoding)
print(f"βœ… Successfully loaded CSV with encoding: {encoding}")
return df
except UnicodeDecodeError:
continue
except Exception as e:
print(f"❌ Error with {encoding}: {e}")
break
raise ValueError(f"❌ Failed to load CSV with any encoding: {file_path}")
else:
raise ValueError(f"❌ Unsupported file type: {file_extension}. Supported types: .csv, .xlsb")
def load_csv_with_encoding_fallback(csv_path: str) -> pd.DataFrame:
"""
Legacy function name - now supports both CSV and .xlsb files.
Args:
csv_path: Path to the CSV or .xlsb file
Returns:
Loaded DataFrame
"""
return load_data_file(csv_path)
def validate_csv_schema(df: pd.DataFrame) -> None:
"""
Validate that the DataFrame has all expected columns.
Args:
df: DataFrame to validate
Raises:
ValueError: If required columns are missing
"""
missing_cols = set(EXPECTED_COLUMNS) - set(df.columns)
if missing_cols:
raise ValueError(f"Missing required columns: {missing_cols}")
print(f"βœ… All expected columns present. Shape: {df.shape}")
def prepare_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean and prepare the DataFrame for KPI calculations.
Args:
df: Raw DataFrame
Returns:
Cleaned DataFrame
"""
df = df.copy()
# Handle missing values and data type conversions
df['GIORNO'] = df['GIORNO'].fillna(0).astype(int)
df['SETT'] = df['SETT'].fillna(0).astype(int)
df['DATA_DA_ORE6_A_6'] = pd.to_datetime(df['DATA_DA_ORE6_A_6'], errors='coerce')
df['TURNO'] = df['TURNO'].fillna(0).astype(int)
df['BOLLA'] = df['BOLLA'].astype(object)
df['MESE_DESCR'] = df['MESE_DESCR'].str.strip()
print("βœ… DataFrame prepared successfully")
return df
# =============================================================================
# πŸ“ˆ KPI CALCULATION FUNCTIONS
# =============================================================================
def analyze_reparto_kpi(df: pd.DataFrame, reparto_code: str) -> pd.DataFrame:
"""
Filter and analyze KPI data for a specific reparto.
Args:
df: Raw DataFrame
reparto_code: Department code (e.g., 'ST', 'MS', 'DC')
Returns:
Filtered DataFrame for the specified reparto
Raises:
ValueError: If no data found for the reparto
"""
# Filter by reparto
df_filtered = df[df['REPARTO'] == reparto_code].copy()
if df_filtered.empty:
raise ValueError(f"No data found for reparto: {reparto_code}")
print(f"πŸ“Š Dati {REPARTO_DESCRIPTIONS.get(reparto_code, reparto_code)} ({reparto_code}): {len(df_filtered):,} righe")
print(f"πŸ“… Range anni: {df_filtered['ANNO'].min()} - {df_filtered['ANNO'].max()}")
print(f"πŸ“… Range mesi: {df_filtered['MESE'].min()} - {df_filtered['MESE'].max()}")
print(f"οΏ½οΏ½ Macchine coinvolte: {df_filtered['MACCHINA'].nunique()}")
print(f"πŸ“‹ Articoli prodotti: {df_filtered['ARTICOLO'].nunique()}")
return df_filtered
def calcola_kpi(group: pd.DataFrame) -> pd.Series:
"""
Calculate KPIs for a group of data according to official Brivaplast formulas.
Args:
group: DataFrame group (typically from groupby operation)
Returns:
Series with calculated KPIs
"""
# Sum the necessary columns
h_disp = group['H_DISP'].sum()
h_sched = group['H_SCHED'].sum()
h_prog = group['H_PROG'].sum()
h_lav = group['H_LAV'].sum()
h_fer = group['H_FER'].sum()
oreatt = group['OREATT'].sum() # Real setup hours
orecamp = group['ORECAMP'].sum() # Real sampling hours
h_att_t = group['H_ATT_T'].sum() # Theoretical setup hours
h_camp_t = group['H_CAMP_T'].sum() # Theoretical sampling hours
n_pz = group['N_PZ'].sum()
qtsca = group['QTSCA'].sum() # Scraps
qtnc = group['QTNC'].sum() # Non-conformities
npz_nodich = group['NPZ_NODICH'].sum() # Undeclared pieces
pgp = group['PGP'].sum()
c11fer = group['C11FER'].sum()
c20fer = group['C20FER'].sum()
# Calculate KPIs according to official formulas
sfrutt = h_sched / h_disp if h_disp > 0 else 0 # SFRUTT = H_SCHED / H_DISP
eff_rep = h_lav / (h_sched - oreatt - orecamp) if (h_sched - oreatt - orecamp) > 0 else 0
eff_pro = h_lav / (h_prog - (c11fer + c20fer)) if (h_prog - (c11fer + c20fer)) > 0 else 0
eff_sc = (n_pz - qtnc) / (n_pz + qtsca) if (n_pz + qtsca) > 0 else 0
eff_e = n_pz / pgp if pgp > 0 else 0
oee = ((h_lav / h_disp) * eff_sc * (n_pz + qtsca) / pgp) if h_disp > 0 and pgp > 0 else 0
# Calculate non-conformity percentage
perc_nc = (qtnc / n_pz) * 100 if n_pz > 0 else 0
return pd.Series({
# Hours
'_H_DISP': h_disp,
'_H_SCHED': h_sched,
'_H_PROG': h_prog,
'_H_LAV': h_lav,
'_H_FER': h_fer,
'_H_ATT_T': h_att_t,
'_H_ATTR': oreatt,
'_H_CAMP_T': h_camp_t,
'_H_CAMP': orecamp,
# Main KPIs
'_SFRUTT': sfrutt,
'_EFF_REP': eff_rep,
'_EFF_PRO': eff_pro,
'_EFF_SC': eff_sc,
'_EFF_E': eff_e,
'OEE': oee,
# Pieces
'_N_PZ': n_pz,
'_N_SC': qtsca,
'_PZ_ND': npz_nodich,
'N_NC': qtnc,
'_%NC': perc_nc,
# KPIs for compatibility with existing code
'H_SCHED': h_sched,
'N_PZ': n_pz,
'N_SC': qtsca,
'EFF_REP': eff_rep,
'EFF_PRO': eff_pro,
'EFF_SC': eff_sc,
'EFF_E': eff_e,
'N_pz_tot_h': (n_pz + qtsca) / h_sched if h_sched > 0 else 0,
'N_pz_ok_h': n_pz / h_sched if h_sched > 0 else 0
})
def create_df_eff_reparto_ytd(df: pd.DataFrame, selected_month: int,
selected_year: int, reparto_code: str) -> pd.DataFrame:
"""
Creates df_eff_reparto_ytd with YTD metrics up to selected month
(matches notebook implementation exactly)
Args:
df: Raw data DataFrame
selected_month: Current month (1-12) - YTD will be calculated up to this month
selected_year: Current year
reparto_code: Department code (default 'ST' for STAMPAGGIO)
Returns:
DataFrame with YTD efficiency metrics for current and previous year + YoY deltas
"""
# Filter data for selected reparto
df_reparto = df[df['REPARTO'] == reparto_code].copy()
# Calculate previous year
previous_year = selected_year - 1
# Calculate YTD metrics for both years
ytd_results = []
for year in [previous_year, selected_year]:
# Filter data for YTD period (January to selected_month)
df_ytd = df_reparto[
(df_reparto['ANNO'] == year) &
(df_reparto['MESE'] <= selected_month)
].copy()
if not df_ytd.empty:
# Calculate YTD KPIs using the same calcola_kpi function
ytd_kpi = calcola_kpi(df_ytd)
# Create period identifier
period = pd.Period(f"{year}-{selected_month:02d}", freq='M')
ytd_results.append({
'Period': period,
'Year': int(year),
'YTD_Month': int(selected_month),
**ytd_kpi
})
# Create DataFrame with Period index
if not ytd_results:
return pd.DataFrame()
df_ytd_result = pd.DataFrame(ytd_results)
df_ytd_result.set_index('Period', inplace=True)
# Convert KPIs to percentages and round to 1 decimal
efficiency_cols = ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']
for col in efficiency_cols:
if col in df_ytd_result.columns:
df_ytd_result[col] = round(df_ytd_result[col] * 100, 1)
# Initialize YoY delta columns
delta_cols = [f'{col}_YoY_Delta' for col in efficiency_cols]
for col in delta_cols:
df_ytd_result[col] = np.nan
# Calculate Year-over-Year deltas if we have both years
if len(df_ytd_result) == 2:
# Sort by year to ensure proper order
df_ytd_result = df_ytd_result.sort_values('Year')
# Get previous and current year data
prev_year_data = df_ytd_result.iloc[0]
curr_year_data = df_ytd_result.iloc[1]
# Calculate YoY deltas for each KPI
for col in efficiency_cols:
if col in df_ytd_result.columns:
prev_value = prev_year_data[col]
curr_value = curr_year_data[col]
if prev_value != 0:
delta = ((curr_value - prev_value) / prev_value) * 100
# Set delta only for current year row
df_ytd_result.iloc[1, df_ytd_result.columns.get_loc(f'{col}_YoY_Delta')] = round(delta, 1)
# Add descriptive labels - fix the formatting issue (matches notebook exactly)
df_ytd_result['YTD_Label'] = df_ytd_result.apply(
lambda row: f"YTD {int(row['Year'])} (Gen-{int(row['YTD_Month']):02d})", axis=1
)
# Reorder columns: descriptive info first, then KPIs, then deltas
info_cols = ['Year', 'YTD_Month', 'YTD_Label']
column_order = info_cols + efficiency_cols + delta_cols
available_cols = [col for col in column_order if col in df_ytd_result.columns]
df_ytd_result = df_ytd_result[available_cols]
return df_ytd_result
# =============================================================================
# πŸ“Š CHART BUILDING FUNCTIONS
# =============================================================================
def build_eff_chart(df: pd.DataFrame, selected_month: int, selected_year: int,
reparto_code: str) -> plt.Figure:
"""
Builds a seaborn bar chart comparing selected_year YTD KPIs with previous year and targets.
(matches notebook implementation exactly)
Args:
df: Raw data DataFrame (Dati_Grezzi)
selected_month: Month up to which to calculate YTD (1-12)
selected_year: Year for comparison
reparto_code: Department code (e.g., 'ST' for Stampaggio)
Returns:
matplotlib.figure.Figure: The created figure object
"""
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
import locale
# Set Italian locale for month names (fallback to default if not available)
try:
locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
except:
try:
locale.setlocale(locale.LC_TIME, 'it_IT')
except:
pass # Use default locale
# Get target map from TARGET_KPI_MAP based on reparto_code
target_map = TARGET_KPI_MAP.get(reparto_code, {})
if not target_map:
print(f"⚠️ No target KPIs defined for reparto '{reparto_code}'. Using default values of 0.")
target_map = {
'EFF_REP': 0.0,
'EFF_PRO': 0.0,
'EFF_SC': 0.0,
'EFF_E': 0.0,
'OEE': 0.0
}
# Generate YTD data using the existing function
df_ytd_result = create_df_eff_reparto_ytd(df, selected_month, selected_year, reparto_code)
if df_ytd_result.empty:
print(f"❌ No YTD data available for {reparto_code} up to month {selected_month} of {selected_year}")
return None
# Corporate color palette (matches notebook COLORS exactly)
COLORS = {
'previous_year': '#00AEEF', # Corporate blue
'target': '#E6E2E0', # Light taupe
'selected_year': '#4D4D4F' # Dark grey
}
# KPI order for x-axis
kpi_order = ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']
kpi_labels = ['EFF_Rep', 'EFF_Pro', 'EFF_SC', 'EFF_E', 'OEE']
# Get department description
reparto_descriptions = {
'ST': 'Stampaggio',
'IS': 'Iniezione Soffiaggio',
'AS': 'Assemblaggio',
'PS': 'Produzione Scovoli',
'MS': 'Stampaggio Surlyn',
'DC': 'Decorazioni'
}
descrizione_reparto = reparto_descriptions.get(reparto_code, reparto_code)
# Prepare data for visualization
year_prev = selected_year - 1
# Extract KPI values from df_ytd_result
data_viz = []
for kpi in kpi_order:
# Get values for both years and target
try:
# Previous year
prev_year_row = df_ytd_result[df_ytd_result['Year'] == year_prev]
prev_year_val = prev_year_row[kpi].iloc[0] if len(prev_year_row) > 0 else 0
except (KeyError, IndexError):
prev_year_val = 0
try:
# Current year
curr_year_row = df_ytd_result[df_ytd_result['Year'] == selected_year]
curr_year_val = curr_year_row[kpi].iloc[0] if len(curr_year_row) > 0 else 0
except (KeyError, IndexError):
curr_year_val = 0
target_val = target_map.get(kpi, 0)
# Add data points for each bar
data_viz.extend([
{'KPI': kpi_labels[kpi_order.index(kpi)], 'Tipo': f'{year_prev}', 'Valore': prev_year_val, 'Color': 'previous_year'},
{'KPI': kpi_labels[kpi_order.index(kpi)], 'Tipo': 'Target', 'Valore': target_val, 'Color': 'target'},
{'KPI': kpi_labels[kpi_order.index(kpi)], 'Tipo': f'{selected_year}', 'Valore': curr_year_val, 'Color': 'selected_year'}
])
df_viz = pd.DataFrame(data_viz)
# Create figure with exact dimensions (960x540 px at 100 DPI)
fig, ax = plt.subplots(figsize=(9.6, 5.4), dpi=100, facecolor='white')
# Create the bar plot
bar_plot = sns.barplot(
data=df_viz,
x='KPI',
y='Valore',
hue='Tipo',
order=kpi_labels,
hue_order=[f'{year_prev}', 'Target', f'{selected_year}'],
palette=[COLORS['previous_year'], COLORS['target'], COLORS['selected_year']],
ax=ax
)
# Customize the plot
# Title
mese_names = ['', 'Gennaio', 'Febbraio', 'Marzo', 'Aprile', 'Maggio', 'Giugno',
'Luglio', 'Agosto', 'Settembre', 'Ottobre', 'Novembre', 'Dicembre']
mese_nome = mese_names[selected_month] if selected_month <= 12 else f'Mese {selected_month}'
title = f"Efficienze {descrizione_reparto} Gennaio/{mese_nome} - {year_prev}/{selected_year}"
ax.set_title(title, fontsize=14, fontweight='bold', pad=20)
# Y-axis configuration
ax.set_ylim(0, 100)
ax.set_ylabel('Efficienza (%)', fontsize=12)
ax.set_xlabel('')
# Grid lines at 25% intervals
ax.set_yticks([0, 25, 50, 75, 100])
ax.grid(True, axis='y', alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Format y-axis labels as percentages
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0f}%'))
# Add data labels using helper function
_annotate_bars(ax, target_map, year_prev, selected_year, kpi_order)
# Legend configuration
legend = ax.legend(title='', loc='upper center', bbox_to_anchor=(0.5, -0.05),
ncol=3, frameon=False, fontsize=10)
# Adjust layout to prevent clipping
plt.tight_layout()
# Set transparent background if needed
fig.patch.set_alpha(1.0) # Change to 0.0 for transparent
return fig
def _annotate_bars(ax, target_map, year_prev, year_curr, kpi_order):
"""
Annotate the bar-chart produced by `build_eff_chart()`.
β€’ Smart placement: puts the label above the bar unless it would overflow,
in which case it is drawn inside the bar.
β€’ Colour logic:
– Target bar β†’ label always black
– Prev/Cur Yr β†’ green if β‰₯ target, red otherwise
β€’ Contrast: labels drawn inside bars have a white stroke for readability.
β€’ Robustness: works even if the number of bars per KPI group β‰  3.
Parameters:
-----------
ax : matplotlib.axes.Axes
The axes object containing the bar chart
target_map : dict
Dictionary mapping KPI names to target values
year_prev : int
Previous year for comparison
year_curr : int
Current year for comparison
kpi_order : list
List of KPI names in order
Returns:
--------
matplotlib.axes.Axes
The same axes, for possible chaining.
"""
import matplotlib.patheffects as path_effects
# --- constants --------------------------------------------------------- #
GREEN = "#006100"
RED = "#C00000"
y_min, y_max = ax.get_ylim()
offset = max((y_max - y_min) * 0.015, 2.0) # 1.5 % of range or β‰₯ 2 pt
# How many bars compose one KPI group?
bars_per_group = 0
if ax.containers:
bars_per_group = len(ax.containers)
if bars_per_group == 0:
return ax
# ----------------------------------------------------------------------- #
for idx, bar in enumerate(ax.patches):
height = bar.get_height()
if height <= 0: # nothing to show
continue
# Identify KPI and bar type (prev-yr / target / curr-yr)
kpi_idx = idx % len(kpi_order)
tipo_idx = idx // len(kpi_order)
if kpi_idx >= len(kpi_order): # safety net
continue
kpi_name = kpi_order[kpi_idx]
target_val = target_map.get(kpi_name)
if target_val is None:
target_val = 0 # Default if no target
bar_type = ""
# Determine bar_type from hue order used in the plot
hue_order = [str(year_prev), 'Target', str(year_curr)]
if tipo_idx < len(hue_order):
bar_type = hue_order[tipo_idx]
# --- colour selection --------------------------------------------- #
label_colour = "black" # Default
if bar_type == "Target":
label_colour = "black"
elif bar_type in {str(year_prev), str(year_curr)}:
label_colour = GREEN if height >= target_val else RED
# --- label positioning and contrast effect ------------------------ #
bbox_props = None
if height > y_max - (y_max * 0.05): # Overflow if height > 95% of y_max
y_pos = height - offset
va = "top"
# Add a white box for contrast when inside the bar
bbox_props = dict(boxstyle="square,pad=0.2", fc="white", ec="none", alpha=0.7)
else: # above the bar
y_pos = height + offset
va = "bottom"
ax.text(
bar.get_x() + bar.get_width() / 2.0,
y_pos,
f"{height:.1f}%",
ha="center",
va=va,
fontsize=9,
fontweight="bold",
color=label_colour,
bbox=bbox_props
)
return ax
# =============================================================================
# 🎨 POWERPOINT CREATION FUNCTIONS
# =============================================================================
def create_ops_review_presentation(template_path: str, selected_month: int,
selected_year: int, output_dir: str = '.') -> Tuple[str, Presentation]:
"""
Build a two-slide Operations Review deck using template.
This adds intro slides to the template without removing any slides
to completely avoid XML corruption issues.
Args:
template_path: Path to PowerPoint template
selected_month: Month for the report
selected_year: Year for the report
output_dir: Directory to save the presentation
Returns:
Tuple of (output_path, presentation_object)
"""
# I/O checks
if not os.path.exists(template_path):
raise FileNotFoundError(f"Template not found: {template_path}")
if not 1 <= selected_month <= 12:
raise ValueError("selected_month must be 1β€’12")
os.makedirs(output_dir, exist_ok=True)
# Load template and use it as-is
prs = Presentation(template_path)
original_slide_count = len(prs.slides)
print(f"βœ… Loaded template with {original_slide_count} original slide(s)")
# Find TITLE_AND_BODY layout
layout = next(
(lyt for lyt in prs.slide_layouts if lyt.name.upper() == "TITLE_AND_BODY"),
None
)
if layout is None:
# Fallback: use the first available layout
layout = prs.slide_layouts[0] if len(prs.slide_layouts) > 0 else None
if layout is None:
raise RuntimeError("No slide layouts available in template")
print(f"⚠️ TITLE_AND_BODY layout not found, using layout: {layout.name}")
# Helper to add a styled title slide
def add_title_slide(text: str) -> None:
sld = prs.slides.add_slide(layout)
title_shape = sld.shapes.title
if title_shape is None:
for ph in sld.placeholders:
if ph.placeholder_format.type == PP_PLACEHOLDER.TITLE and ph.has_text_frame:
title_shape = ph
break
if title_shape is None: # fallback textbox
w, h = Inches(8), Inches(2.5)
left = (prs.slide_width - w) // 2
top = (prs.slide_height - h) // 2
title_shape = sld.shapes.add_textbox(left, top, w, h)
title_shape.text_frame.clear()
p = title_shape.text_frame.paragraphs[0]
p.text = text
p.alignment = PP_ALIGN.CENTER
f = p.font
f.name = "Arial"
f.size = Pt(60)
f.bold = True
f.color.rgb = RGBColor(0, 112, 192)
p.line_spacing = 1.15
# Add intro slides - they will be at the end, but that's fine
month_name = ITALIAN_MONTH_NAMES[selected_month]
add_title_slide(f"Monthly Operations review\n{month_name} {selected_year}")
add_title_slide("Production KPIs")
print(f"βœ… Added 2 intro slides (total slides: {len(prs.slides)})")
print(f"πŸ“ Note: Intro slides are at positions {len(prs.slides)-1} and {len(prs.slides)}")
# Save file
fname = f"Operations monthly review {selected_month:02d}-{selected_year}.pptx"
out_path = os.path.join(output_dir, fname)
prs.save(out_path)
print(f"πŸ’Ύ Saved β†’ {out_path} (slides: {len(prs.slides)})")
return out_path, prs
def create_df_eff_reparto(df: pd.DataFrame, selected_month: int,
selected_year: int, reparto_code: str) -> pd.DataFrame:
"""
Creates df_eff_reparto with parametric month and reparto selection
(matches notebook implementation exactly)
Args:
df: Raw data DataFrame
selected_month: Current month (1-12)
selected_year: Current year
reparto_code: Department code (default 'ST' for STAMPAGGIO)
Returns:
DataFrame with Period[M] index and efficiency metrics + individual deltas
"""
# Filter data for selected reparto
df_reparto = df[df['REPARTO'] == reparto_code].copy()
# Calculate previous year
previous_year = selected_year - 1
# Filter data for the two-year period and selected months
df_filtered = df_reparto[
((df_reparto['ANNO'] == previous_year) & (df_reparto['MESE'] <= selected_month)) |
((df_reparto['ANNO'] == selected_year) & (df_reparto['MESE'] <= selected_month))
].copy()
# Calculate KPIs grouped by year and month
kpi_monthly = df_filtered.groupby(['ANNO', 'MESE', 'MESE_DESCR']).apply(calcola_kpi).reset_index()
# Create complete date range
periods = []
for year in [previous_year, selected_year]:
for month in range(1, selected_month + 1):
periods.append(pd.Period(f"{year}-{month:02d}", freq='M'))
# Create base dataframe with Period index
df_eff_reparto = pd.DataFrame(index=periods)
df_eff_reparto.index.name = 'Period'
# Initialize columns with NaN
efficiency_cols = ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']
delta_cols = [f'{col}_Delta' for col in efficiency_cols]
# Initialize all columns
for col in efficiency_cols + delta_cols:
df_eff_reparto[col] = np.nan
# Fill in the actual KPI values
for _, row in kpi_monthly.iterrows():
period = pd.Period(f"{row['ANNO']}-{row['MESE']:02d}", freq='M')
if period in df_eff_reparto.index:
for col in efficiency_cols:
# Convert to percentage and round to 1 decimal
df_eff_reparto.loc[period, col] = round(row[col] * 100, 1)
# Fill missing values with forward fill for continuity
for col in efficiency_cols:
df_eff_reparto[col] = df_eff_reparto[col].ffill().fillna(0)
# Calculate % Delta vs previous month for each KPI
for i, period in enumerate(df_eff_reparto.index):
if i == 0:
# First row: no previous month to compare
for col in efficiency_cols:
df_eff_reparto.loc[period, f'{col}_Delta'] = np.nan
else:
# Calculate delta for each individual KPI
for col in efficiency_cols:
prev_value = df_eff_reparto.iloc[i-1][col]
curr_value = df_eff_reparto.iloc[i][col]
if prev_value != 0:
delta = ((curr_value - prev_value) / prev_value) * 100
df_eff_reparto.loc[period, f'{col}_Delta'] = round(delta, 1)
else:
df_eff_reparto.loc[period, f'{col}_Delta'] = 0.0
# Ensure all values are rounded to 1 decimal place
for col in efficiency_cols:
df_eff_reparto[col] = df_eff_reparto[col].round(1)
# Reorder columns: KPIs first, then all deltas at the end
column_order = efficiency_cols + delta_cols
df_eff_reparto = df_eff_reparto[column_order]
return df_eff_reparto
def prepare_comparison_data(df: pd.DataFrame, selected_month: int,
selected_year: int, reparto_code: str) -> Tuple[pd.DataFrame, Dict]:
"""
Prepares a wide-format DataFrame for the efficiency comparison table with delta calculations.
Processes data to create a DataFrame where rows are months and columns are KPIs.
Each cell contains a dictionary with values for the selected year and the previous year.
Additionally calculates deltas for the selected month vs its previous month.
Args:
df (pd.DataFrame): The raw 'Dati Grezzi' DataFrame.
selected_month (int): The month to report up to.
selected_year (int): The main year for the report.
reparto_code (str): The department code (e.g., 'ST').
Returns:
tuple: (pd.DataFrame, dict) - The comparison DataFrame and delta calculations.
"""
previous_year = selected_year - 1
kpis = ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']
# Use the existing function to get monthly KPI data (use 12 to get all months for both years)
df_eff = create_df_eff_reparto(df, 12, selected_year, reparto_code)
# Filter for the relevant years and up to the selected month for the current year
df_eff = df_eff[df_eff.index.year.isin([previous_year, selected_year])]
# Italian month names, uppercase - use our predefined list
month_names = [ITALIAN_MONTH_NAMES[m] for m in range(1, 13)]
# Pivot the data to get years as columns
df_pivot = df_eff.reset_index().pivot_table(
index=df_eff.index.month,
columns=df_eff.index.year,
values=kpis
)
df_pivot.columns = [f'{kpi}_{year}' for kpi, year in df_pivot.columns]
# Create the final structure
output_df = pd.DataFrame(index=month_names, columns=kpis, dtype=object)
# Calculate deltas for selected month vs previous month
deltas = {}
if selected_month > 1:
for kpi in kpis:
try:
curr_month_val = df_pivot.loc[selected_month, f'{kpi}_{selected_year}']
prev_month_val = df_pivot.loc[selected_month - 1, f'{kpi}_{selected_year}']
if pd.notna(curr_month_val) and pd.notna(prev_month_val):
deltas[kpi] = curr_month_val - prev_month_val
else:
deltas[kpi] = np.nan
except KeyError:
deltas[kpi] = np.nan
for month_num, month_name in enumerate(month_names, 1):
for kpi in kpis:
try:
prev_val = df_pivot.loc[month_num, f'{kpi}_{previous_year}']
curr_val = df_pivot.loc[month_num, f'{kpi}_{selected_year}']
# Only include data up to the selected month for the current year
if month_num > selected_month:
curr_val = np.nan
except KeyError:
prev_val, curr_val = np.nan, np.nan
# Use 'at' instead of 'loc' for dictionary assignment
output_df.at[month_name, kpi] = {'prev': prev_val, 'curr': curr_val}
return output_df, deltas
def set_table_black_borders(table):
"""
Remove any table style (so PPT won't override) and then
set all borders to a solid 1 pt black line on every cell.
"""
from lxml import etree
from pptx.oxml.ns import qn
NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"
# 1) strip out the <a:tblStyle> so our borders aren't overridden
# table._tbl is the low-level CT_Table object under the covers
tbl = table._tbl
tblPr = tbl.tblPr
tblStyle = tblPr.find(qn('a:tblStyle'))
if tblStyle is not None:
tblPr.remove(tblStyle)
# 2) now inject our own <a:lnL>, <a:lnR>, <a:lnT>, <a:lnB> on every cell
for row in table.rows:
for cell in row.cells:
# cell._tc is the CT_Tc element; get or create its <a:tcPr>
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
for border_dir in ('lnL', 'lnR', 'lnT', 'lnB'):
# remove any existing
for elem in tcPr.findall(f'a:{border_dir}', namespaces={'a': NS_A}):
tcPr.remove(elem)
# build <a:lnX> with width=12700 EMU (1 pt), flat cap, solid compound
ln = etree.SubElement(
tcPr, f'{{{NS_A}}}{border_dir}',
{
'w': '12700', # 1pt
'cap': 'flat',
'cmpd': 'sng',
'algn': 'ctr'
}
)
# add <a:solidFill><a:srgbClr val="000000"/></a:solidFill>
solidFill = etree.SubElement(ln, f'{{{NS_A}}}solidFill')
etree.SubElement(solidFill, f'{{{NS_A}}}srgbClr', val='000000')
# ensure it's truly solid
etree.SubElement(ln, f'{{{NS_A}}}prstDash', val='solid')
def add_eff_comparison_table_slide(prs: Presentation, df: pd.DataFrame, selected_year: int,
selected_month: int, reparto_code: str, template_path: str = None,
output_dir: str = '.') -> Presentation:
"""
Adds a new slide with a styled efficiency comparison table to a presentation.
If `prs` is None, a new presentation is created using `create_ops_review_presentation`.
The function adds one slide for the specified department, showing a month-by-month
KPI comparison for the selected year vs. the previous year.
Args:
prs (pptx.Presentation or None): The presentation object. If None, a new one is created.
df (pd.DataFrame): The raw 'Dati Grezzi' DataFrame.
selected_year (int): The primary year for comparison.
selected_month (int): The month to report up to.
reparto_code (str): The department code (e.g., 'ST', 'MS').
template_path (str, optional): Path to the PowerPoint template. Required if prs is None.
output_dir (str): Directory where the presentation will be saved.
Returns:
prs (pptx.Presentation): The presentation object with the new slide added.
"""
if prs is None:
if template_path is None:
raise ValueError("A template_path must be provided if prs is None.")
print(f"✨ Creating new presentation using template")
_, prs = create_ops_review_presentation(template_path, selected_month, selected_year, output_dir)
else:
print(f"πŸ“‚ Using provided presentation object")
# --- Data and Metadata Preparation ---
previous_year = selected_year - 1
reparto_descr = REPARTO_DESCRIPTIONS.get(reparto_code, reparto_code)
kpi_targets = TARGET_KPI_MAP.get(reparto_code, {})
kpis_to_show = ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']
# Prepare data using the helper function (now returns deltas too)
df_comparison, deltas = prepare_comparison_data(df, selected_month, selected_year, reparto_code)
# --- Slide Creation ---
# Try different layouts to find one with a title, starting with index 0
slide_layout = None
for layout_idx in range(len(prs.slide_layouts)):
test_slide = prs.slides.add_slide(prs.slide_layouts[layout_idx])
if test_slide.shapes.title is not None:
# Found a layout with title, remove the test slide and use this layout
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
slide_layout = prs.slide_layouts[layout_idx]
break
else:
# Remove the test slide
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
# If no layout with title found, use layout 1 and create title manually
if slide_layout is None:
slide_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_layout)
# Set Title - LEFT ALIGNED and positioned ABOVE the table
title_text = f"CONFRONTO EFFICIENZE {previous_year}/{selected_year} – {reparto_descr.upper()}"
if slide.shapes.title is not None:
# Title placeholder exists
slide.shapes.title.text = title_text
slide.shapes.title.text_frame.paragraphs[0].font.size = Pt(32)
slide.shapes.title.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT # LEFT ALIGN
else:
# Create title manually - LEFT ALIGNED and positioned ABOVE table
title_box = slide.shapes.add_textbox(Inches(0.3), Inches(0.3), Inches(12.3), Inches(1.0))
title_frame = title_box.text_frame
title_frame.text = title_text
title_para = title_frame.paragraphs[0]
title_para.font.size = Pt(32)
title_para.font.bold = True
title_para.alignment = PP_ALIGN.LEFT # LEFT ALIGN
# --- Table Creation and Positioning ---
rows, cols = 14, 6 # 1 header row + 12 months + 1 delta row, 6 columns (month + 5 KPIs)
table_shape = slide.shapes.add_table(rows, cols, Inches(0.3), Inches(1.6), Inches(12.4), Inches(7.0))
table = table_shape.table
# --- Header Styling ---
# Italian month names, uppercase - use our predefined list
month_names = [ITALIAN_MONTH_NAMES[m] for m in range(1, 13)]
# Month column header - RED BACKGROUND with years styled differently
cell_month_header = table.cell(0, 0)
cell_month_header.fill.solid()
cell_month_header.fill.fore_color.rgb = RGBColor(0xB4, 0x45, 0x57) # RED BACKGROUND like in image
tf = cell_month_header.text_frame
tf.clear()
# Previous year paragraph - GRAY TEXT
p1 = tf.paragraphs[0]
p1.text = str(previous_year)
p1.font.color.rgb = RGBColor(0x80, 0x80, 0x80) # GRAY
p1.font.bold = True
p1.font.size = Pt(20) # BIGGER FONT
p1.alignment = PP_ALIGN.CENTER
# Current year paragraph - WHITE TEXT
p2 = tf.add_paragraph()
p2.text = str(selected_year)
p2.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # WHITE
p2.font.bold = True
p2.font.size = Pt(20) # BIGGER FONT
p2.alignment = PP_ALIGN.CENTER
cell_month_header.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# KPI Headers (merged with targets)
for i, kpi in enumerate(kpis_to_show, 1):
cell_kpi = table.cell(0, i)
target_val = kpi_targets.get(kpi, 0)
# Clear existing text and create two paragraphs
tf = cell_kpi.text_frame
tf.clear()
# KPI name paragraph
p1 = tf.paragraphs[0]
p1.text = kpi
p1.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
p1.font.bold = True
p1.font.size = Pt(20) # BIGGER FONT
p1.alignment = PP_ALIGN.CENTER
# Target paragraph
p2 = tf.add_paragraph()
p2.text = f">{target_val}%"
p2.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
p2.font.size = Pt(20) # BIGGER FONT
p2.alignment = PP_ALIGN.CENTER
# Set background color based on column
cell_kpi.fill.solid()
if kpi == 'OEE': # OEE column should be GRAY
cell_kpi.fill.fore_color.rgb = RGBColor(0x80, 0x80, 0x80) # GRAY for OEE column
else:
cell_kpi.fill.fore_color.rgb = RGBColor(0x3F, 0x3F, 0x3F) # Dark gray for other columns
cell_kpi.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# --- Body Styling ---
delta_row_inserted = False
current_row = 1
for month_idx, month_name in enumerate(month_names):
# Month Name Column
cell_month = table.cell(current_row, 0)
cell_month.text = month_name
cell_month.fill.solid()
cell_month.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Very light grey
p = cell_month.text_frame.paragraphs[0]
p.font.bold = True
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p.font.size = Pt(18) # BIGGER FONT
p.alignment = PP_ALIGN.CENTER
cell_month.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# KPI Value Cells
for c, kpi in enumerate(kpis_to_show, 1):
cell = table.cell(current_row, c)
values = df_comparison.loc[month_name, kpi]
prev_val = values.get('prev')
curr_val = values.get('curr')
tf = cell.text_frame
tf.clear()
# Previous year value
p1 = tf.paragraphs[0]
p1.text = f"{prev_val:.1f}%".replace('.',',') if pd.notna(prev_val) else ""
p1.font.color.rgb = RGBColor(0x80, 0x80, 0x80) # Grey
p1.font.size = Pt(18) # BIGGER FONT
p1.alignment = PP_ALIGN.CENTER
# Current year value
p2 = tf.add_paragraph()
p2.text = f"{curr_val:.1f}%".replace('.',',') if pd.notna(curr_val) else ""
p2.font.bold = True
p2.font.size = Pt(18) # BIGGER FONT
p2.alignment = PP_ALIGN.CENTER
# Color logic for current year value
if pd.notna(curr_val):
target = kpi_targets.get(kpi, 0)
if curr_val >= target:
p2.font.color.rgb = RGBColor(0x00, 0xB0, 0x50) # Green
else:
p2.font.color.rgb = RGBColor(0xC0, 0x00, 0x00) # Red
# Set background color for OEE column
cell.fill.solid()
if kpi == 'OEE': # OEE column cells should be GRAY
cell.fill.fore_color.rgb = RGBColor(0xE5, 0xE5, 0xE5) # Light gray for OEE data cells
else:
cell.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White for other columns
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
current_row += 1
# Insert delta row after selected month
if month_idx + 1 == selected_month and not delta_row_inserted and selected_month > 1:
# Delta month label
cell_delta = table.cell(current_row, 0)
cell_delta.text = "Ξ” vs prev"
cell_delta.fill.solid()
cell_delta.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Very light grey
p = cell_delta.text_frame.paragraphs[0]
p.font.size = Pt(15) # BIGGER FONT
p.font.italic = True
p.alignment = PP_ALIGN.CENTER
cell_delta.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# Delta values
for c, kpi in enumerate(kpis_to_show, 1):
cell = table.cell(current_row, c)
delta_val = deltas.get(kpi)
tf = cell.text_frame
tf.clear()
p = tf.paragraphs[0]
if pd.notna(delta_val):
sign = "+" if delta_val >= 0 else ""
p.text = f"{sign}{delta_val:.1f}%".replace('.',',')
# Color based on delta direction
if delta_val >= 0:
p.font.color.rgb = RGBColor(0x00, 0xB0, 0x50) # Green for positive
else:
p.font.color.rgb = RGBColor(0xC0, 0x00, 0x00) # Red for negative
else:
p.text = ""
p.font.size = Pt(12) # BIGGER FONT
p.font.italic = True
p.alignment = PP_ALIGN.CENTER
# Set background color for OEE column
cell.fill.solid()
if kpi == 'OEE': # OEE column cells should be GRAY
cell.fill.fore_color.rgb = RGBColor(0xE5, 0xE5, 0xE5) # Light gray for OEE delta cells
else:
cell.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White for other columns
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
current_row += 1
delta_row_inserted = True
# --- Table Border Styling (BLACK borders) ---
set_table_black_borders(table)
# --- Final Table Adjustments ---
# Set column widths - distribute evenly across slide width
total_width = 23
col_width = total_width / cols
for i in range(cols):
table.columns[i].width = Inches(col_width)
# Set row heights - need to account for delta row position
delta_row_position = selected_month + 1 if selected_month > 1 else None # Delta row position
for r in range(rows):
if r == 0: # Header row
table.rows[r].height = Inches(1)
elif r == delta_row_position: # Delta row (positioned after selected_month)
table.rows[r].height = Inches(0.45)
else: # Month rows
table.rows[r].height = Inches(0.8)
print(f"βœ… Slide 'CONFRONTO EFFICIENZE' for {reparto_descr} added successfully.")
# --- Unit Test Checks (as print statements) ---
print("\n--- Running Checks ---")
print(f"Table has {len(table.rows)} rows (expected 14)")
print(f"Table has {len(table.columns)} columns (expected 6)")
# Check if delta row was inserted
if delta_row_inserted:
print(f"βœ… Delta row inserted after month {selected_month}")
else:
print(f"⚠️ Delta row not inserted (selected_month: {selected_month})")
# Check a value format
try:
val_check = table.cell(1,1).text_frame.paragraphs[1].text
if '%' in val_check and ',' in val_check:
print(f"βœ… Cell format check passed (e.g., '{val_check}')")
else:
print(f"⚠️ Cell format check failed (e.g., '{val_check}')")
except IndexError:
print("⚠️ Could not check cell format.")
return prs
def add_eff_chart_slide(prs: Presentation, df: pd.DataFrame, reparto_code: str,
selected_year: int, selected_month: int) -> Presentation:
"""
Creates a new slide with efficiency chart for a specific department.
Parameters:
-----------
prs : pptx.presentation.Presentation
PowerPoint presentation object
df : pandas.DataFrame
Raw data DataFrame (Dati_Grezzi)
reparto_code : str
Department code (e.g., 'ST', 'MS', 'DC')
selected_year : int
Year for comparison
selected_month : int
Month up to which to calculate YTD (1-12)
Returns:
--------
pptx.presentation.Presentation
Updated presentation object with new chart slide
"""
# Store initial slide count for unit testing
initial_slide_count = len(prs.slides)
# Generate the chart using build_eff_chart
fig = build_eff_chart(df, selected_month, selected_year, reparto_code)
# Extract title from the chart BEFORE removing it and convert to UPPERCASE
chart_title = fig.axes[0].get_title().upper() # Convert to uppercase
# Remove the chart title and y-axis label to avoid duplication and clean up the chart
fig.axes[0].set_title('')
fig.axes[0].set_ylabel('') # Remove y-axis label
# Save chart as temporary image
temp_img_path = None
try:
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
temp_img_path = tmp_file.name
# Save with exact dimensions (build_eff_chart already sets figsize correctly)
fig.savefig(temp_img_path, dpi=96, bbox_inches='tight',
facecolor='white', edgecolor='none')
# Try to use layout 1 ("B"), otherwise find suitable layout
slide_layout = None
if len(prs.slide_layouts) > 1:
# Try layout index 1 ("B")
slide_layout = prs.slide_layouts[1]
else:
# Fallback: find first layout with title and content placeholders
for layout in prs.slide_layouts:
placeholders = layout.placeholders
has_title = any(p.placeholder_format.type == 1 for p in placeholders) # Title placeholder
has_content = any(p.placeholder_format.type == 7 for p in placeholders) # Content placeholder
if has_title and has_content:
slide_layout = layout
break
# If still no suitable layout found, use the first available
if slide_layout is None:
slide_layout = prs.slide_layouts[0]
# Create new slide
slide = prs.slides.add_slide(slide_layout)
# Set slide title - LEFT ALIGNED and UPPERCASE to match table slide
title_placeholder = None
for placeholder in slide.placeholders:
if placeholder.placeholder_format.type == 1: # Title placeholder
title_placeholder = placeholder
break
if title_placeholder:
title_placeholder.text = chart_title # Already uppercase
# Format title: 32pt, bold, LEFT aligned, black (matching table slide)
title_frame = title_placeholder.text_frame
title_paragraph = title_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT # Changed from CENTER to LEFT
title_run = title_paragraph.runs[0]
title_run.font.size = Pt(32)
title_run.font.bold = True
title_run.font.color.rgb = RGBColor(0, 0, 0) # Black
else:
# Create title manually if no placeholder - LEFT ALIGNED and positioned to match table
title_box = slide.shapes.add_textbox(Inches(0.3), Inches(0.3), Inches(12.3), Inches(1.0))
title_frame = title_box.text_frame
title_frame.text = chart_title # Already uppercase
title_para = title_frame.paragraphs[0]
title_para.font.size = Pt(32)
title_para.font.bold = True
title_para.alignment = PP_ALIGN.LEFT # LEFT ALIGN to match table
title_para.font.color.rgb = RGBColor(0, 0, 0) # Black
# Insert chart image with double dimensions (2x original: 10" Γ— 5.625" β†’ 20" Γ— 11.25")
content_placeholder = None
for placeholder in slide.placeholders:
if placeholder.placeholder_format.type == 7: # Content placeholder
content_placeholder = placeholder
break
# Define target dimensions: Double the original chart size
target_width = Inches(20.0) # Double of original 10"
target_height = Inches(11.25) # Double of original 5.625"
if content_placeholder:
# Remove the placeholder
content_placeholder._element.getparent().remove(content_placeholder._element)
# Position image - moved right for better centering
# Standard slide width is ~13.33", so we move it right to center better
slide_width = Inches(13.33) # Standard slide width
# Calculate center position and then add offset to move right
center_left = (slide_width - target_width) / 2
# Move right by 1.5 inches for better visual centering
left = center_left + Inches(1.5) if target_width < slide_width else Inches(1.8)
top = Inches(1.6) # Match table top position
slide.shapes.add_picture(temp_img_path, left, top, target_width, target_height)
# Unit test checks
final_slide_count = len(prs.slides)
print(f"βœ… Chart slide added for {reparto_code}")
print(f"πŸ“Š Slide count: {initial_slide_count} β†’ {final_slide_count}")
print(f"πŸ“‹ Slide title (UPPERCASE): '{chart_title}'")
print(f"🎯 Chart title and y-axis label removed from image")
# Verify image dimensions (double size: 20" x 11.25")
added_shape = slide.shapes[-1] # Last added shape should be our image
expected_width_emu = int(20.0 * 914400) # 20" in EMU
expected_height_emu = int(11.25 * 914400) # 11.25" in EMU
tolerance = 0.01 # 1% tolerance
width_ok = abs(added_shape.width - expected_width_emu) / expected_width_emu <= tolerance
height_ok = abs(added_shape.height - expected_height_emu) / expected_height_emu <= tolerance
if width_ok and height_ok:
print(f"βœ… Image dimensions verified: {added_shape.width} x {added_shape.height} EMU")
else:
print(f"⚠️ Image dimensions: {added_shape.width} x {added_shape.height} EMU (expected ~{expected_width_emu} x {expected_height_emu})")
print(f"πŸ“ Image positioned at: Left={added_shape.left/914400:.1f}\", Top={added_shape.top/914400:.1f}\"")
# Warning about size
if target_width > slide_width:
print(f"⚠️ Warning: Image width ({target_width/914400:.1f}\") exceeds standard slide width ({slide_width/914400:.1f}\")")
# Unit test assertions
assert final_slide_count == initial_slide_count + 1, f"Expected {initial_slide_count + 1} slides, got {final_slide_count}"
finally:
# Clean up temporary file
if temp_img_path and os.path.exists(temp_img_path):
os.unlink(temp_img_path)
# Close the matplotlib figure to free memory
plt.close(fig)
return prs
def add_breakdowns_overall_slide(prs, df, selected_year, selected_month, template_path=None, output_dir='.'):
"""
Adds a new slide with a styled breakdowns overall table to a presentation.
If `prs` is None, a new presentation is created using `create_ops_review_presentation`.
The function adds one slide showing overall breakdowns for all departments,
comparing the selected year vs. the previous year.
Args:
prs (pptx.Presentation or None): The presentation object. If None, a new one is created.
df (pd.DataFrame): The raw 'Dati Grezzi' DataFrame.
selected_year (int): The primary year for comparison.
selected_month (int): The month to report up to.
template_path (str, optional): Path to the PowerPoint template. Required if prs is None.
output_dir (str): Directory where the presentation will be saved.
Returns:
prs (pptx.Presentation): The presentation object with the new slide added.
"""
from lxml import etree
from pptx.oxml.ns import qn
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_VERTICAL_ANCHOR
import pandas as pd
if prs is None:
if template_path is None:
raise ValueError("A template_path must be provided if prs is None.")
print(f"✨ Creating new presentation using template")
_, prs = create_ops_review_presentation(template_path, selected_month, selected_year, output_dir)
else:
print(f"πŸ“‚ Using provided presentation object")
# --- Data Preparation ---
previous_year = selected_year - 1
# Get breakdown data using the create_df_h_fer_causali_monthly function
try:
df_breakdowns = create_df_h_fer_causali_monthly(df, selected_year, selected_month)
except Exception as e:
print(f"❌ Error getting breakdown data: {e}")
return prs
# --- Slide Creation ---
# Try different layouts to find one with a title
slide_layout = None
for layout_idx in range(len(prs.slide_layouts)):
test_slide = prs.slides.add_slide(prs.slide_layouts[layout_idx])
if test_slide.shapes.title is not None:
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
slide_layout = prs.slide_layouts[layout_idx]
break
else:
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
if slide_layout is None:
slide_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_layout)
# Set Title
title_text = "BREAKDOWNS OVERALL"
if slide.shapes.title is not None:
slide.shapes.title.text = title_text
slide.shapes.title.text_frame.paragraphs[0].font.size = Pt(32)
slide.shapes.title.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
else:
# Create title manually
title_box = slide.shapes.add_textbox(Inches(0.3), Inches(0.3), Inches(12.3), Inches(1.0))
title_frame = title_box.text_frame
title_frame.text = title_text
title_para = title_frame.paragraphs[0]
title_para.font.size = Pt(32)
title_para.font.bold = True
title_para.alignment = PP_ALIGN.LEFT
# --- Table Creation and Positioning ---
# Calculate number of rows: header + months up to selected_month
rows = 1 + selected_month # 1 header + months
cols = 5 # Month + 4 data columns
table_shape = slide.shapes.add_table(rows, cols, Inches(0.3), Inches(1.6), Inches(12.4), Inches(7.0))
table = table_shape.table
# --- Header Styling ---
# Column headers
headers = [
f"{previous_year}\n{selected_year}",
f"{previous_year}\nH FER",
f"TOP 3 CAUSALI\nFERMO {previous_year}",
f"{selected_year}\nH FER",
f"TOP 3 CAUSALI\nFERMO {selected_year}"
]
for i, header in enumerate(headers):
cell = table.cell(0, i)
cell.fill.solid()
if i == 0: # First column (years)
cell.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Light grey background
else:
cell.fill.fore_color.rgb = RGBColor(0x3F, 0x3F, 0x3F) # Dark gray
tf = cell.text_frame
tf.clear()
if i == 0: # Years column - special styling
lines = header.split('\n')
# Previous year paragraph - BLUE TEXT
p1 = tf.paragraphs[0]
p1.text = lines[0]
p1.font.color.rgb = RGBColor(0x00, 0xAE, 0xEF) # Blue
p1.font.bold = True
p1.font.size = Pt(20)
p1.alignment = PP_ALIGN.CENTER
# Current year paragraph - BLACK TEXT
p2 = tf.add_paragraph()
p2.text = lines[1]
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p2.font.bold = True
p2.font.size = Pt(20)
p2.alignment = PP_ALIGN.CENTER
else:
# Regular header styling
p = tf.paragraphs[0]
p.text = header
p.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White
p.font.bold = True
p.font.size = Pt(16)
p.alignment = PP_ALIGN.CENTER
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# --- Body Data Filling ---
# Italian month names - use the same approach as create_df_h_fer_causali_monthly
month_names = [
'GENNAIO', 'FEBBRAIO', 'MARZO', 'APRILE', 'MAGGIO', 'GIUGNO',
'LUGLIO', 'AGOSTO', 'SETTEMBRE', 'OTTOBRE', 'NOVEMBRE', 'DICEMBRE'
]
current_row = 1
for month_idx in range(selected_month):
month_name = month_names[month_idx]
# Month Name Column
cell_month = table.cell(current_row, 0)
cell_month.text = month_name
cell_month.fill.solid()
cell_month.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Light grey
p = cell_month.text_frame.paragraphs[0]
p.font.bold = True
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p.font.size = Pt(18)
p.alignment = PP_ALIGN.CENTER
cell_month.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# Data columns - get data from df_breakdowns
for col_idx, column in enumerate(df_breakdowns.columns, 1):
cell = table.cell(current_row, col_idx)
tf = cell.text_frame
tf.clear()
# Get value from the DataFrame
value = df_breakdowns.loc[month_name, column] if month_name in df_breakdowns.index else None
# Single paragraph for each cell
p = tf.paragraphs[0]
if pd.notna(value) and value != 0:
if 'H FER' in column:
# Format as integer for H FER columns
p.text = str(int(value)) if pd.notna(value) else ""
# Color coding: Blue for previous year, Black for selected year
if str(previous_year) in column:
p.font.color.rgb = RGBColor(0x00, 0xAE, 0xEF) # Blue
else:
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p.font.size = Pt(18)
else:
# TOP 3 CAUSALI columns - multi-line text
p.text = str(value) if pd.notna(value) else ""
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p.font.size = Pt(15)
else:
p.text = ""
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p.font.bold = True
p.alignment = PP_ALIGN.CENTER if 'H FER' in column else PP_ALIGN.LEFT
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White background
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
current_row += 1
# --- Table Border Styling (BLACK borders) ---
def set_table_black_borders(table):
"""Set all borders to solid 1pt black lines on every cell."""
NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"
# Remove table style to prevent overrides
tbl = table._tbl
tblPr = tbl.tblPr
tblStyle = tblPr.find(qn('a:tblStyle'))
if tblStyle is not None:
tblPr.remove(tblStyle)
# Set borders on every cell
for row in table.rows:
for cell in row.cells:
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
for border_dir in ('lnL', 'lnR', 'lnT', 'lnB'):
# Remove existing borders
for elem in tcPr.findall(f'a:{border_dir}', namespaces={'a': NS_A}):
tcPr.remove(elem)
# Add 1pt black border
ln = etree.SubElement(
tcPr, f'{{{NS_A}}}{border_dir}',
{'w': '12700', 'cap': 'flat', 'cmpd': 'sng', 'algn': 'ctr'}
)
solidFill = etree.SubElement(ln, f'{{{NS_A}}}solidFill')
etree.SubElement(solidFill, f'{{{NS_A}}}srgbClr', val='000000')
etree.SubElement(ln, f'{{{NS_A}}}prstDash', val='solid')
# Apply black borders
set_table_black_borders(table)
# --- Final Table Adjustments ---
# Set column widths
col_widths = [Inches(4), Inches(3), Inches(7.0), Inches(2.5), Inches(7.0)]
for i, width in enumerate(col_widths):
table.columns[i].width = width
# Set row heights
table.rows[0].height = Inches(1.0) # Header row
for r in range(1, rows):
table.rows[r].height = Inches(0.8) # Data rows
print(f"βœ… Slide 'BREAKDOWNS OVERALL' added successfully.")
# --- Unit Test Checks ---
print("\n--- Running Checks ---")
print(f"Table has {len(table.rows)} rows (expected {rows})")
print(f"Table has {len(table.columns)} columns (expected {cols})")
print(f"βœ… Breakdowns overall slide created")
return prs
# =============================================================================
# 🎯 MAIN ENTRY POINT
# =============================================================================
def make_ppt(csv_path: str, selected_month: int, selected_year: int) -> str:
"""
Main function to generate PowerPoint presentation from CSV data.
This follows the exact notebook pattern:
1. First department (ST) creates the presentation via add_eff_comparison_table_slide with prs=None
2. Subsequent departments (MS, DC) add their slides to the existing presentation
Args:
csv_path: Path to the CSV file containing manufacturing data
selected_month: Month for the report (1-12)
selected_year: Year for the report
Returns:
str: Path to the generated PowerPoint file
Raises:
ValueError: If CSV is invalid or parameters are out of range
FileNotFoundError: If CSV file or template is not found
"""
# Validate inputs
if not os.path.exists(csv_path):
raise FileNotFoundError(f"CSV file not found: {csv_path}")
if not 1 <= selected_month <= 12:
raise ValueError("selected_month must be between 1 and 12")
if not 2020 <= selected_year <= 2030:
raise ValueError("selected_year must be between 2020 and 2030")
print(f"πŸš€ Starting PowerPoint generation for all departments")
print(f"πŸ“‚ Input CSV: {csv_path}")
print(f"πŸ“… Report period: Up to {ITALIAN_MONTH_NAMES[selected_month]} {selected_year}")
# Load and validate data
df = load_csv_with_encoding_fallback(csv_path)
validate_csv_schema(df)
df = prepare_dataframe(df)
# Template path (relative to the module) - check multiple possible locations
possible_template_paths = [
Path(__file__).parent.parent / "template.pptx", # Root directory
Path(__file__).parent.parent / "PPT Assets" / "Single Slide Template Operations monthly review 04-25.pptx", # Original path
Path(__file__).parent / "template.pptx", # src directory
]
template_path = None
for path in possible_template_paths:
if path.exists():
template_path = path
break
if template_path is None:
raise FileNotFoundError(f"Template not found in any of these locations: {[str(p) for p in possible_template_paths]}")
# Create output directory in temp
output_dir = tempfile.mkdtemp(prefix="briva_ppt_")
# Department mapping: actual data departments -> expected departments
dept_mapping = {
'BP': 'PS', # Brush Production -> Produzione Scovoli
'DC': 'DC', # Decoration -> Decorazioni (stays the same)
'IM': 'ST', # Injection Molding -> Stampaggio
'IS': 'MS', # Injection Surlyn -> Stampaggio Surlyn
'IB': 'IS', # Injection Blowing -> Iniezione Soffiaggio
'AS': 'AS' # Assembling -> Assemblaggio (stays the same)
}
# Apply department mapping to the dataframe
df_mapped = df.copy()
df_mapped['REPARTO'] = df_mapped['REPARTO'].map(dept_mapping).fillna(df_mapped['REPARTO'])
# Departments to process - ST first (creates base), then MS and DC
repartos = ['ST', 'MS', 'DC']
prs = None
final_path = None
try:
# Process departments following exact notebook pattern
for reparto_code in repartos:
print(f"\n--- Generating slides for {REPARTO_DESCRIPTIONS[reparto_code]} ({reparto_code}) ---")
# Check if data exists for this reparto
reparto_data = df_mapped[df_mapped['REPARTO'] == reparto_code]
if reparto_data.empty:
print(f"⚠️ No data found for {reparto_code}, skipping...")
continue
# For the first department, create new presentation via add_eff_comparison_table_slide
if reparto_code == repartos[0]:
print(f"πŸ—οΈ Creating new presentation for first department {reparto_code}")
# Create new presentation for the first department (follows notebook pattern exactly)
prs = add_eff_comparison_table_slide(
prs=None, # This triggers creation of intro slides
df=df_mapped,
selected_year=selected_year,
selected_month=selected_month,
reparto_code=reparto_code,
template_path=str(template_path),
output_dir=output_dir
)
# Set the final path based on the created presentation
final_path = os.path.join(output_dir, f"Operations monthly review {selected_month:02d}-{selected_year}.pptx")
print(f"βœ… Base presentation created with {len(prs.slides)} slides")
else: # Subsequent departments - add to existing presentation
print(f"πŸ“„ Adding {reparto_code} slides to existing presentation")
# Add table slide for subsequent departments
prs = add_eff_comparison_table_slide(
prs=prs,
df=df_mapped,
selected_year=selected_year,
selected_month=selected_month,
reparto_code=reparto_code
)
# Add chart slide immediately after the table slide for this department
print(f"πŸ“Š Adding chart slide for {reparto_code}...")
prs = add_eff_chart_slide(
prs=prs,
df=df_mapped,
reparto_code=reparto_code,
selected_year=selected_year,
selected_month=selected_month
)
# Add campionatura slide only for ST (Stampaggio) department
if reparto_code == 'ST':
print(f"πŸ”¬ Adding campionatura slide for {reparto_code}...")
prs = add_campionatura_table_slide(
prs=prs,
df=df_mapped,
selected_year=selected_year,
selected_month=selected_month,
reparto_code=reparto_code
)
# Add attrezzaggi slide for ST + MS departments
print(f"πŸ”§ Adding attrezzaggi slide for ST + MS departments...")
prs = add_attrezzaggi_slide(
prs=prs,
df=df_mapped,
selected_year=selected_year,
selected_month=selected_month,
reparto_codes=['ST', 'MS']
)
print(f"βœ… {reparto_code} slides added. Total slides: {len(prs.slides)}")
if prs is None:
raise ValueError("No valid departments found in the data")
# Add YTD Overall slide at the end
print(f"\n--- Adding YTD Overall slide ---")
prs = add_ytd_overall_slide(
prs=prs,
df=df_mapped,
selected_month=selected_month,
selected_year=selected_year
)
# Add Breakdowns Overall slide
print(f"\n--- Adding Breakdowns Overall slide ---")
prs = add_breakdowns_overall_slide(
prs=prs,
df=df_mapped,
selected_year=selected_year,
selected_month=selected_month
)
# Add Top Offenders slide at the end
print(f"\n--- Adding Top Offenders slide ---")
prs = add_top_offenders_slide(
prs=prs,
df=df_mapped,
selected_year=selected_year,
selected_month=selected_month
)
# Remove the first empty slide (template slide) at the very end to avoid XML corruption
# This is done after all slides have been created to minimize disruption
if len(prs.slides) > 8: # We expect 8 slides (2 intro + 6 department slides)
print(f"\nπŸ—‘οΈ Removing original template slide (slide 1) at end of process...")
try:
# Get the first slide (index 0) - this should be the empty template slide
first_slide = prs.slides[0]
# Check if it's actually empty/template slide by looking for minimal content
is_empty_template = True
for shape in first_slide.shapes:
if hasattr(shape, 'text_frame') and shape.text_frame:
try:
text_content = shape.text_frame.text.strip()
if text_content and len(text_content) > 10: # More than just placeholder text
is_empty_template = False
break
except:
pass
if is_empty_template:
# Use the simplest possible approach - remove from slide ID list only
# This avoids relationship manipulation issues
sldIdLst = prs.slides._sldIdLst
if len(sldIdLst) > 0:
# Remove the first slide ID (index 0)
removed_slide = sldIdLst[0]
sldIdLst.remove(removed_slide)
print(f"βœ… Successfully removed empty template slide")
print(f"πŸ“Š Final slide count: {len(prs.slides)} slides")
else:
print(f"⚠️ Slide ID list is empty")
else:
print(f"⚠️ First slide appears to have content, not removing")
except Exception as e:
print(f"⚠️ Could not remove template slide: {e}")
print(f"πŸ“Š Keeping all {len(prs.slides)} slides")
# Save final presentation
if final_path:
prs.save(final_path)
print(f"\nβœ… Final PowerPoint saved: {final_path}")
print(f"πŸ“Š Total slides: {len(prs.slides)}")
print(f"🏭 Departments included: {', '.join(repartos)}")
print(f"πŸ“‹ ST department has 4 slides: comparison table + YTD chart + campionatura table + attrezzaggi (ST+MS)")
print(f"πŸ“‹ MS/DC departments have 2 slides each: comparison table + YTD chart")
print(f"πŸ“‹ Third-to-last slide: YTD Overall summary for all departments")
print(f"πŸ“‹ Second-to-last slide: Breakdowns Overall showing H_FER and top 3 causes by month")
print(f"πŸ“‹ Final slide: Top Offenders showing causali di fermo across all departments")
# Verify "Production KPIs" slide is present
production_kpis_found = False
for i, slide in enumerate(prs.slides):
for shape in slide.shapes:
if hasattr(shape, 'text_frame') and shape.text_frame:
try:
text_content = shape.text_frame.text.strip()
if 'Production KPIs' in text_content:
production_kpis_found = True
print(f"βœ… 'Production KPIs' slide verified in position {i+1}")
break
except:
pass
if production_kpis_found:
break
if not production_kpis_found:
print("⚠️ Warning: 'Production KPIs' slide not found in final presentation")
return final_path
else:
raise RuntimeError("Failed to create presentation")
except Exception as e:
print(f"❌ Error creating PowerPoint: {str(e)}")
raise
# =============================================================================
# πŸ§ͺ TESTING AND VALIDATION
# =============================================================================
def validate_ppt_output(ppt_path: str) -> bool:
"""
Validate that the generated PowerPoint file is valid and contains expected content.
Args:
ppt_path: Path to the PowerPoint file
Returns:
bool: True if valid, False otherwise
"""
try:
if not os.path.exists(ppt_path):
print(f"❌ File does not exist: {ppt_path}")
return False
# Try to load the file
prs = Presentation(ppt_path)
# Check that it has at least 1 slide
if len(prs.slides) < 1:
print(f"❌ Presentation has no slides")
return False
# Expected minimum slides: 2 intro + (ST: 4 slides) + (MS: 2 slides) + (DC: 2 slides) + 3 final slides = 13 slides
expected_min_slides = 13
if len(prs.slides) < expected_min_slides:
print(f"⚠️ PowerPoint has {len(prs.slides)} slides, expected at least {expected_min_slides}")
# Check for key slide titles
slide_titles = []
for i, slide in enumerate(prs.slides):
title_text = ""
if slide.shapes.title:
title_text = slide.shapes.title.text
elif any(shape.has_text_frame for shape in slide.shapes):
# Look for title in text shapes
for shape in slide.shapes:
if shape.has_text_frame and shape.text_frame.text.strip():
title_text = shape.text_frame.text.strip()
break
slide_titles.append(title_text)
# Check for expected slide content
expected_content = [
'Production KPIs',
'YTD OVERALL',
'BREAKDOWNS OVERALL',
'TOP 3 OFFENDER',
'ORE CAMPIONATURA',
'ZOOM ATTREZZAGGI'
]
found_content = []
for content in expected_content:
for title in slide_titles:
if content in title:
found_content.append(content)
break
file_size = os.path.getsize(ppt_path)
print(f"βœ… PowerPoint validation passed:")
print(f" πŸ“ File size: {file_size:,} bytes")
print(f" πŸ“Š Slides: {len(prs.slides)}")
print(f" πŸ“ Dimensions: {prs.slide_width} x {prs.slide_height}")
print(f" πŸ“‹ Found expected content: {', '.join(found_content)}")
if len(found_content) < len(expected_content):
missing = set(expected_content) - set(found_content)
print(f" ⚠️ Missing content: {', '.join(missing)}")
return True
except Exception as e:
print(f"❌ PowerPoint validation failed: {str(e)}")
return False
# =============================================================================
# πŸ“Š NEW FUNCTIONS FROM NOTEBOOK
# =============================================================================
def build_df_campionatura(df, reparto_code, selected_year, selected_month) -> pd.DataFrame:
"""
Returns a month-indexed DataFrame containing theoretical hours, real hours, and deltas for campionatura
so that the reporting layer can read the numbers directly.
Parameters:
-----------
df : pd.DataFrame
The raw 'Dati Grezzi' DataFrame
reparto_code : str
Department code (e.g., 'ST', 'MS', 'DC')
selected_year : int
The current year for analysis
selected_month : int
The current month (1-12) to analyze up to
Returns:
--------
pd.DataFrame
DataFrame with Period[M] index and campionatura metrics
Raises:
-------
ValueError
If no data found for the specified reparto
"""
import pandas as pd
import numpy as np
# Calculate previous year
previous_year = selected_year - 1
# D β€’ Filtering logic
# 1. Keep rows where REPARTO == reparto_code
df_filtered = df[df['REPARTO'] == reparto_code].copy()
# 2. Keep rows with ANNO ∈ {previous_year, selected_year} and MESE ≀ selected_month
df_filtered = df_filtered[
((df_filtered['ANNO'] == previous_year) & (df_filtered['MESE'] <= selected_month)) |
((df_filtered['ANNO'] == selected_year) & (df_filtered['MESE'] <= selected_month))
]
# 3. If filter returns zero rows β†’ raise ValueError
if df_filtered.empty:
raise ValueError(f"No data for reparto {reparto_code}")
# E β€’ Construction rules
# 1. Aggregate metrics per year-month as in C
aggregated = df_filtered.groupby(['ANNO', 'MESE']).agg({
'H_CAMP_T': 'sum', # C β€’ H_CAMP_T = Ξ£ H_CAMP_T (ore campionatura teoriche)
'ORECAMP': 'sum' # C β€’ H_CAMP = Ξ£ ORECAMP (ore campionatura reali)
}).reset_index()
# A β€’ Frame shape & index
# 2. Index covers 24 consecutive periods: January β‡’ selected_month for previous_year and selected_year
periods = []
for year in [previous_year, selected_year]:
for month in range(1, selected_month + 1):
periods.append(pd.Period(f"{year}-{month:02d}", freq='M'))
# 1. The returned DataFrame has a Period[M] index named Period
# 3. Order is chronological (oldest β†’ newest)
df_result = pd.DataFrame(index=periods)
df_result.index.name = 'Period'
# B β€’ Columns (exact order)
# ['H_CAMP_T', 'H_CAMP', 'DELTA_ABS', 'DELTA_PCT']
for col in ['H_CAMP_T', 'H_CAMP', 'DELTA_ABS', 'DELTA_PCT']:
df_result[col] = np.nan
# Fill in actual values from aggregated data
for _, row in aggregated.iterrows():
period = pd.Period(f"{int(row['ANNO'])}-{int(row['MESE']):02d}", freq='M')
if period in df_result.index:
df_result.loc[period, 'H_CAMP_T'] = row['H_CAMP_T']
df_result.loc[period, 'H_CAMP'] = row['ORECAMP'] # Use original column name
# E β€’ Construction rules
# 3. Fill missing H_CAMP_T and H_CAMP with 0.0
df_result['H_CAMP_T'] = df_result['H_CAMP_T'].fillna(0.0)
df_result['H_CAMP'] = df_result['H_CAMP'].fillna(0.0)
# C β€’ Metric definitions
# DELTA_ABS = H_CAMP – H_CAMP_T
# DELTA_PCT = (DELTA_ABS / H_CAMP_T) Γ— 100 (NaN if H_CAMP_T == 0)
df_result['DELTA_ABS'] = df_result['H_CAMP'] - df_result['H_CAMP_T']
# Calculate DELTA_PCT with special handling for H_CAMP_T == 0
df_result['DELTA_PCT'] = np.where(
df_result['H_CAMP_T'] == 0,
np.nan,
(df_result['DELTA_ABS'] / df_result['H_CAMP_T']) * 100
)
# E β€’ Construction rules
# 4. For rows where H_CAMP_T == 0 set DELTA_ABS = 0.0 and DELTA_PCT = NaN
mask_zero_theoretical = df_result['H_CAMP_T'] == 0
df_result.loc[mask_zero_theoretical, 'DELTA_ABS'] = 0.0
df_result.loc[mask_zero_theoretical, 'DELTA_PCT'] = np.nan
# B β€’ Convert to appropriate data types and round
# Convert hour columns to integers (no decimals)
for col in ['H_CAMP_T', 'H_CAMP', 'DELTA_ABS']:
df_result[col] = df_result[col].round(0).astype('Int64') # Nullable integer
# DELTA_PCT stays as float but rounded to integer
df_result['DELTA_PCT'] = df_result['DELTA_PCT'].round(0)
return df_result
def build_df_attrezzaggi(df, reparto_codes, selected_year, selected_month):
"""
Creates a two-row YTD summary of ore attrezzaggio (set-up hours) for the chosen reparto(s).
When multiple departments are provided, it aggregates the data across all departments.
Parameters:
-----------
df : pandas.DataFrame
Raw data DataFrame (Dati_Grezzi)
reparto_codes : str or list
Department code(s) (e.g., 'ST', ['ST', 'MS'], ['ST', 'MS', 'DC'])
selected_year : int
Current year for analysis
selected_month : int
Month up to which to calculate YTD (1-12)
Returns:
--------
pandas.DataFrame
DataFrame with Period[M] index, two rows for previous_year and selected_year
with YTD attrezzaggi metrics and theoretical/actual time deltas
Raises:
-------
ValueError
If no data found for the specified reparto(s)
"""
import pandas as pd
import numpy as np
# --- Section A: Input processing ---
# Handle both single string and list inputs
if isinstance(reparto_codes, str):
reparto_codes = [reparto_codes]
# Validate input
if not reparto_codes:
raise ValueError("reparto_codes cannot be empty")
previous_year = selected_year - 1
# Filter by reparto(s)
df_reparto = df[df['REPARTO'].isin(reparto_codes)].copy()
# Filter by years and months
df_filtered = df_reparto[
(
((df_reparto['ANNO'] == previous_year) & (df_reparto['MESE'] <= selected_month)) |
((df_reparto['ANNO'] == selected_year) & (df_reparto['MESE'] <= selected_month))
)
].copy()
# Check if data exists
if df_filtered.empty:
raise ValueError(f"No data for reparto(s) {reparto_codes}")
# --- Section B: Class mapping and theoretical times ---
# Map CAU_ATT to classes and define theoretical times
class_mapping = {'VERSI': 'CV', 'COLOR': 'CC', 'STAMP': 'CS'}
theoretical_times = {
'CS': 8.0, # Cambio stampo: 8 ore
'CV': 6.0, # Cambio versione: 6 ore
'CC': 4.0 # Cambio colore: 4 ore
}
df_filtered = df_filtered[df_filtered['CAU_ATT'].isin(class_mapping.keys())].copy()
df_filtered['CLASS'] = df_filtered['CAU_ATT'].map(class_mapping)
# --- Section C: Metrics calculation ---
classes = ['CS', 'CV', 'CC']
ytd_results = []
for year in [previous_year, selected_year]:
# YTD data for this year
df_year = df_filtered[df_filtered['ANNO'] == year].copy()
# Initialize result dict for this year
reparto_label = '+'.join(sorted(reparto_codes)) if len(reparto_codes) > 1 else reparto_codes[0]
year_result = {
'Period': pd.Period(f"{year}-{selected_month:02d}", freq='M'),
'Year': year,
'YTD_Month': selected_month,
'YTD_Label': f"YTD {year} (Gen-{selected_month:02d})",
'Reparti': reparto_label
}
# Calculate YTD metrics for each class (aggregated across all departments)
for class_code in classes:
df_class = df_year[df_year['CLASS'] == class_code]
if not df_class.empty:
# Aggregate across all departments
# N_ATTR: Count unique BOLLAs across all departments
n_attr = df_class['BOLLA'].nunique()
# OREATT: Sum of all actual hours across all departments
oreatt = df_class['OREATT'].sum()
# Calculate theoretical hours: nΒ°attrezzaggi * tempo teorico
tempo_teorico = theoretical_times[class_code]
h_att_t = n_attr * tempo_teorico
# Calculate average per attrezzaggio (on aggregated data)
media_h = oreatt / n_attr if n_attr > 0 else 0.0
# Calculate delta: media_h - tempo_teorico
delta_abs = media_h - tempo_teorico if n_attr > 0 else 0.0
else:
h_att_t = 0.0
oreatt = 0.0
n_attr = 0
media_h = 0.0
delta_abs = 0.0
# YTD metrics
year_result[f'H_ATT_T_{class_code}'] = round(h_att_t, 1)
year_result[f'OREATT_{class_code}'] = round(oreatt, 1)
year_result[f'N_ATTR_{class_code}'] = n_attr
year_result[f'MEDIA_H_{class_code}'] = round(media_h, 1)
year_result[f'DELTA_ABS_{class_code}'] = round(delta_abs, 1)
ytd_results.append(year_result)
# --- Section D: Output DataFrame ---
if not ytd_results:
# Create empty dataframe with proper structure
periods = [
pd.Period(f"{previous_year}-{selected_month:02d}", freq='M'),
pd.Period(f"{selected_year}-{selected_month:02d}", freq='M')
]
df_result = pd.DataFrame(index=periods)
df_result.index.name = 'Period'
# Add all required columns with default values
info_cols = ['Year', 'YTD_Month', 'YTD_Label', 'Reparti']
metric_cols = []
for class_code in classes:
metric_cols.extend([
f'H_ATT_T_{class_code}', f'OREATT_{class_code}',
f'N_ATTR_{class_code}', f'MEDIA_H_{class_code}',
f'DELTA_ABS_{class_code}'
])
reparto_label = '+'.join(sorted(reparto_codes)) if len(reparto_codes) > 1 else reparto_codes[0]
for col in info_cols + metric_cols:
if col in info_cols:
if col == 'YTD_Label':
df_result[col] = [f"YTD {previous_year} (Gen-{selected_month:02d})",
f"YTD {selected_year} (Gen-{selected_month:02d})"]
elif col == 'Year':
df_result[col] = [previous_year, selected_year]
elif col == 'YTD_Month':
df_result[col] = [selected_month, selected_month]
elif col == 'Reparti':
df_result[col] = [reparto_label, reparto_label]
else:
df_result[col] = 0.0
else:
# Create dataframe from results
df_result = pd.DataFrame(ytd_results)
df_result.set_index('Period', inplace=True)
# Ensure proper column order
info_cols = ['Year', 'YTD_Month', 'YTD_Label', 'Reparti']
metric_cols = []
for class_code in classes:
metric_cols.extend([
f'H_ATT_T_{class_code}', f'OREATT_{class_code}',
f'N_ATTR_{class_code}', f'MEDIA_H_{class_code}',
f'DELTA_ABS_{class_code}'
])
# Reorder columns
df_result = df_result[info_cols + metric_cols]
# Ensure all numeric columns are proper types
for col in df_result.columns:
if col not in ['Year', 'YTD_Month', 'YTD_Label', 'Reparti']:
if 'N_ATTR_' in col:
df_result[col] = df_result[col].astype('int64')
else:
df_result[col] = df_result[col].astype('float64')
return df_result
def create_df_all_reparti_ytd(df, selected_month, selected_year):
"""
Creates a comprehensive YTD KPI summary table for all departments with AI-generated comments.
Parameters:
-----------
df : pandas.DataFrame
Raw data DataFrame
selected_month : int
Current month (1-12) - YTD will be calculated up to this month
selected_year : int
Current year
Returns:
--------
pandas.DataFrame
DataFrame with reparti as index and YTD KPIs for current and previous year
Each KPI has two columns: one for previous year, one for current year
Plus a 'COMMENTO' column with AI-generated analysis
"""
import numpy as np
import pandas as pd
import os
from openai import OpenAI
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Get all unique reparti from the dataframe
available_reparti = df['REPARTO'].unique()
# Department descriptions mapping
reparto_descriptions = {
'AS': 'ASSEMBLAGGIO',
'DC': 'DECORAZIONI',
'IS': 'INIEZIONE SOFFIAGGIO',
'MS': 'STAMPAGGIO SURLYN',
'PS': 'PRODUZIONE SCOVOLI',
'ST': 'STAMPAGGIO'
}
# KPIs we want to extract
kpi_columns = ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']
# Calculate previous year
previous_year = selected_year - 1
# Initialize OpenAI client
openai_client = None
try:
api_key = os.getenv('OPENAI_API_KEY')
if api_key and api_key != 'your_openai_api_key_here':
openai_client = OpenAI(api_key=api_key)
print("βœ… OpenAI client initialized successfully")
else:
print("⚠️ OpenAI API key not found or not set. Comments will show 'NO API KEY'.")
except Exception as e:
print(f"⚠️ Error initializing OpenAI: {e}. Comments will show error message.")
# Initialize results list
results = []
# Process each reparto
for reparto_code in available_reparti:
print(f"πŸ”„ Processing {reparto_code}...")
# Get YTD data for this reparto
df_ytd = create_df_eff_reparto_ytd(df, selected_month, selected_year, reparto_code)
# Initialize row data
row_data = {
'REPARTO': reparto_code,
'REPARTO_DESCR': reparto_descriptions.get(reparto_code, reparto_code)
}
# Extract KPI values for both years
kpi_values = {}
for kpi in kpi_columns:
# Initialize with NaN
row_data[f'{kpi}_{previous_year}'] = np.nan
row_data[f'{kpi}_{selected_year}'] = np.nan
if not df_ytd.empty:
# Previous year value
prev_year_rows = df_ytd[df_ytd['Year'] == previous_year]
if len(prev_year_rows) > 0:
prev_val = prev_year_rows[kpi].iloc[0]
row_data[f'{kpi}_{previous_year}'] = prev_val
kpi_values[f'{kpi}_prev'] = prev_val
# Current year value
curr_year_rows = df_ytd[df_ytd['Year'] == selected_year]
if len(curr_year_rows) > 0:
curr_val = curr_year_rows[kpi].iloc[0]
row_data[f'{kpi}_{selected_year}'] = curr_val
kpi_values[f'{kpi}_curr'] = curr_val
# Generate comment using OpenAI - NO FALLBACK
comment = generate_kpi_comment(kpi_values, reparto_code, previous_year, selected_year, openai_client)
row_data['COMMENTO'] = comment
print(f" πŸ’¬ Comment for {reparto_code}: {comment}")
results.append(row_data)
# Create the final dataframe
df_all_reparti = pd.DataFrame(results)
print(f"πŸ“Š DataFrame created with shape: {df_all_reparti.shape}")
print(f"πŸ“‹ Columns before reordering: {list(df_all_reparti.columns)}")
# Verify COMMENTO column exists
if 'COMMENTO' not in df_all_reparti.columns:
print("❌ ERROR: COMMENTO column not found in DataFrame!")
return df_all_reparti
# Set reparto description as index
df_all_reparti.set_index('REPARTO_DESCR', inplace=True)
# Remove the REPARTO column since we have it in the index
df_all_reparti.drop('REPARTO', axis=1, inplace=True)
# Create properly ordered columns: alternate between previous and current year for each KPI + comment at the end
ordered_columns = []
for kpi in kpi_columns:
ordered_columns.extend([f'{kpi}_{previous_year}', f'{kpi}_{selected_year}'])
ordered_columns.append('COMMENTO')
print(f"πŸ“‹ Ordered columns: {ordered_columns}")
# Verify all ordered columns exist
missing_cols = [col for col in ordered_columns if col not in df_all_reparti.columns]
if missing_cols:
print(f"❌ ERROR: Missing columns: {missing_cols}")
print(f"πŸ“‹ Available columns: {list(df_all_reparti.columns)}")
return df_all_reparti
df_all_reparti = df_all_reparti[ordered_columns]
# Round values to 1 decimal place (except comment column)
for col in df_all_reparti.columns:
if col != 'COMMENTO':
df_all_reparti[col] = df_all_reparti[col].round(1)
print(f"βœ… Final DataFrame shape: {df_all_reparti.shape}")
print(f"πŸ“‹ Final columns: {list(df_all_reparti.columns)}")
return df_all_reparti
def generate_kpi_comment(kpi_values, reparto_code, previous_year, selected_year, openai_client=None):
"""
Generates a short Italian comment about KPI performance using GPT-4o-mini.
If OpenAI is not available or fails, returns an error message.
Parameters:
-----------
kpi_values : dict
Dictionary with KPI values for both years
reparto_code : str
Department code
previous_year : int
Previous year
selected_year : int
Current year
openai_client : OpenAI client or None
OpenAI client instance
Returns:
--------
str
Short Italian comment about the performance or error message
"""
# If no OpenAI client, return error message
if not openai_client:
return "❌ NO API KEY"
# Get target KPIs for comparison
target_map = TARGET_KPI_MAP.get(reparto_code, {})
try:
# Prepare TARGET_KPI_MAP context for AI (only confirmed targets)
# Exclude AS, IS, PS as they are provisional
confirmed_targets = ['ST', 'MS', 'DC'] # Only these have confirmed targets
target_context = f"""
CONTESTO TARGET KPI CONFERMATI (solo per reparti con target definitivi):
"""
for dept_code, targets in TARGET_KPI_MAP.items():
if dept_code in confirmed_targets:
dept_name = {
'ST': 'STAMPAGGIO',
'MS': 'STAMPAGGIO SURLYN',
'DC': 'DECORAZIONI'
}.get(dept_code, dept_code)
target_context += f"- {dept_name} ({dept_code}): "
target_context += ", ".join([f"{kpi}>{target}%" for kpi, target in targets.items()])
target_context += "\n"
target_context += f"""
IMPORTANTE: I target per AS (ASSEMBLAGGIO), IS (INIEZIONE SOFFIAGGIO), PS (PRODUZIONE SCOVOLI) sono provvisori e NON devono essere considerati nell'analisi.
"""
# Prepare data summary for AI
data_summary = f"""
Reparto: {reparto_code}
Anno precedente: {previous_year}
Anno corrente: {selected_year}
KPI Performance:
"""
for kpi in ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']:
prev_val = kpi_values.get(f'{kpi}_prev')
curr_val = kpi_values.get(f'{kpi}_curr')
target_val = target_map.get(kpi, 0)
if pd.notna(prev_val) and pd.notna(curr_val):
change = "migliorato" if curr_val > prev_val else "peggiorato" if curr_val < prev_val else "stabile"
# Only mention target comparison for confirmed departments
if reparto_code in confirmed_targets:
vs_target = "sotto target" if curr_val < target_val else "sopra target"
data_summary += f"- {kpi}: {prev_val:.1f}% β†’ {curr_val:.1f}% ({change}, {vs_target} {target_val}%)\n"
else:
data_summary += f"- {kpi}: {prev_val:.1f}% β†’ {curr_val:.1f}% ({change})\n"
# Create prompt with TARGET_KPI_MAP context and KPI meanings
prompt = f"""Sei un analista di produzione industriale. Analizza questi dati KPI e genera un commento molto breve (massimo 15 parole) in italiano che riassuma la performance complessiva del reparto.
SIGNIFICATO DEI KPI:
- EFF_REP = Efficienza reparto
- EFF_PRO = Efficienza processo
- EFF_SC = Indice qualitΓ  (pezzi buoni / tot dichiarati)
- EFF_E = Efficienza economica
- OEE = Overall Equipment Effectiveness
{target_context}
{data_summary}
Esempi di stile:
- "Peggiorati tutti i valori."
- "Migliorate tutte le efficienze."
- "Reparto in generale migliorato efficienza di reparto sotto il valore target."
- "Migliorate tutte le efficienze (no OEE)."
- "L'unica efficienza peggiorata Γ¨ l'efficienza di reparto sotto il valore target. Peggiorato anche OEE. Migliorati gli altri KPI."
REGOLE IMPORTANTI:
1. Per ST, MS, DC: considera i target specifici nell'analisi
2. Per AS, IS, PS: NON considerare i target (sono provvisori), concentrati solo sui trend year-over-year.
3. Mantieni il commento molto conciso e diretto
Genera un commento simile basato su queste regole:"""
response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "Sei un esperto analista di produzione industriale che genera commenti brevi e precisi sui KPI. Conosci i target confermati solo per ST, MS, DC. Per AS, IS, PS ignora i target e concentrati sui trend."},
{"role": "user", "content": prompt}
],
max_tokens=50,
temperature=0.3
)
ai_comment = response.choices[0].message.content.strip()
# Clean up the comment
ai_comment = ai_comment.replace('"', '').replace("'", '').strip()
if ai_comment.endswith('.'):
ai_comment = ai_comment[:-1]
return ai_comment
except Exception as e:
error_msg = f"❌ API ERROR: {str(e)[:30]}..."
return error_msg
def create_df_h_fer_causali_monthly(df: pd.DataFrame, selected_year: int, selected_month: int) -> pd.DataFrame:
"""
Creates a DataFrame showing monthly H_FER totals and top 3 downtime causes
for selected_year and previous_year, up to selected_month.
Parameters:
-----------
df : pd.DataFrame
Raw data DataFrame
selected_year : int
Current year for analysis
selected_month : int
Month up to which to analyze (1-12)
Returns:
--------
pd.DataFrame
DataFrame with months as rows and columns for:
- previous_year H_FER total
- previous_year TOP 3 CAUSALI FERMO
- selected_year H_FER total
- selected_year TOP 3 CAUSALI FERMO
"""
import pandas as pd
import numpy as np
previous_year = selected_year - 1
# Italian month names in uppercase
month_names = [
'GENNAIO', 'FEBBRAIO', 'MARZO', 'APRILE', 'MAGGIO', 'GIUGNO',
'LUGLIO', 'AGOSTO', 'SETTEMBRE', 'OTTOBRE', 'NOVEMBRE', 'DICEMBRE'
]
# Filter data for both years up to selected_month
df_filtered = df[
((df['ANNO'] == previous_year) & (df['MESE'] <= selected_month)) |
((df['ANNO'] == selected_year) & (df['MESE'] <= selected_month))
].copy()
# Remove rows with null or zero H_FER
df_filtered = df_filtered[(df_filtered['H_FER'].notna()) & (df_filtered['H_FER'] > 0)]
# Initialize result DataFrame
result_months = month_names[:selected_month]
result_df = pd.DataFrame(
index=result_months,
columns=[
f'{previous_year}\nH FER',
f'TOP 3 CAUSALI\nFERMO {previous_year}',
f'{selected_year}\nH FER',
f'TOP 3 CAUSALI\nFERMO {selected_year}'
]
)
# Process each month
for month_num in range(1, selected_month + 1):
month_name = month_names[month_num - 1]
# Process each year
for year in [previous_year, selected_year]:
# Filter data for this month and year
month_data = df_filtered[
(df_filtered['ANNO'] == year) &
(df_filtered['MESE'] == month_num)
]
if not month_data.empty:
# Calculate total H_FER for this month
total_h_fer = month_data['H_FER'].sum()
# Find top 3 causes by H_FER
top_causes = (month_data.groupby('CAU_F_DESCR')['H_FER']
.sum()
.sort_values(ascending=False)
.head(3))
# Format the top causes string
top_causes_str = ""
for i, (cause, hours) in enumerate(top_causes.items()):
if i > 0:
top_causes_str += "\n"
top_causes_str += f"β€’ {cause} {round(hours)} h"
# Fill in the result DataFrame
result_df.loc[month_name, f'{year}\nH FER'] = round(total_h_fer)
result_df.loc[month_name, f'TOP 3 CAUSALI\nFERMO {year}'] = top_causes_str
else:
# No data for this month/year
result_df.loc[month_name, f'{year}\nH FER'] = 0
result_df.loc[month_name, f'TOP 3 CAUSALI\nFERMO {year}'] = ""
return result_df
def _annotate_causali_bars(ax, df_viz, year_prev, year_curr, causale_order):
"""
Annotate the bar-chart produced by `build_causali_fermo_chart()`.
β€’ Smart placement: puts the label above the bar unless it would overflow,
in which case it is drawn inside the bar.
β€’ Colour logic: label always black for causali fermo percentages
β€’ Contrast: labels drawn inside bars have a white stroke for readability.
Parameters
----------
ax : matplotlib.axes.Axes
The axes object containing the bars to annotate
df_viz : pandas.DataFrame
The visualization dataframe with causali data
year_prev : int
Previous year
year_curr : int
Current year
causale_order : list
List of causali in display order
Returns
-------
matplotlib.axes.Axes
The same axes, for possible chaining.
"""
# --- constants --------------------------------------------------------- #
y_min, y_max = ax.get_ylim()
offset = max((y_max - y_min) * 0.015, 0.1) # 1.5% of range or β‰₯ 0.1%
# How many bars compose one causale group?
bars_per_group = 0
if ax.containers:
bars_per_group = len(ax.containers)
if bars_per_group == 0:
return ax
# ----------------------------------------------------------------------- #
for idx, bar in enumerate(ax.patches):
height = bar.get_height()
if height <= 0: # nothing to show
continue
# Identify causale and bar type (prev-yr / curr-yr)
causale_idx = idx % len(causale_order)
tipo_idx = idx // len(causale_order)
if causale_idx >= len(causale_order): # safety net
continue
# --- colour selection --------------------------------------------- #
label_colour = "black" # Default black for all causali bars
# --- label positioning and contrast effect ------------------------ #
bbox_props = None
if height > y_max - (y_max * 0.05): # Overflow if height > 95% of y_max
y_pos = height - offset
va = "top"
# Add a white box for contrast when inside the bar
bbox_props = dict(boxstyle="square,pad=0.2", fc="white", ec="none", alpha=0.7)
else: # above the bar
y_pos = height + offset
va = "bottom"
ax.text(
bar.get_x() + bar.get_width() / 2.0,
y_pos,
f"{height:.1f}%",
ha="center",
va=va,
fontsize=15,
fontweight="bold",
color=label_colour,
bbox=bbox_props
)
return ax
def build_causali_fermo_chart(df: pd.DataFrame, selected_year: int, selected_month: int) -> plt.Figure:
"""
Builds a seaborn bar chart comparing downtime causes as percentage of scheduled hours
between selected_year and previous_year (YTD up to selected_month).
"""
import pandas as pd
import numpy as np
import locale
# Set Italian locale for month names (fallback to default if not available)
try:
locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
except:
try:
locale.setlocale(locale.LC_TIME, 'it_IT')
except:
pass
previous_year = selected_year - 1
# Filter data for both years up to selected_month
df_filtered = df[
((df['ANNO'] == previous_year) & (df['MESE'] <= selected_month)) |
((df['ANNO'] == selected_year) & (df['MESE'] <= selected_month))
].copy()
# Calculate total H_FER by year and cause (only rows with H_FER > 0)
h_fer_data = df_filtered[
(df_filtered['H_FER'].notna()) &
(df_filtered['CAU_F_DESCR'].notna())
].groupby(['ANNO', 'CAU_F_DESCR'])['H_FER'].sum().reset_index()
# Calculate total H_SCHED by year (all rows with H_SCHED > 0)
h_sched_data = df_filtered[
(df_filtered['H_SCHED'].notna())
].groupby('ANNO')['H_SCHED'].sum().reset_index()
if h_fer_data.empty or h_sched_data.empty:
print(f"❌ No downtime or schedule data available up to month {selected_month}")
return None
# Get top causes based on selected_year impact only (descending order)
selected_year_data = h_fer_data[h_fer_data['ANNO'] == selected_year]
selected_year_sched = h_sched_data[h_sched_data['ANNO'] == selected_year]
if not selected_year_data.empty and not selected_year_sched.empty:
h_sched_selected = selected_year_sched['H_SCHED'].iloc[0]
# Calculate percentages for selected year and sort descending
selected_year_pct = (selected_year_data.set_index('CAU_F_DESCR')['H_FER'] / h_sched_selected * 100).sort_values(ascending=False)
top_causes = selected_year_pct.head(4).index.tolist()
else:
# Fallback to total impact if selected year has no data
top_causes = (h_fer_data.groupby('CAU_F_DESCR')['H_FER']
.sum()
.sort_values(ascending=False)
.head(4)
.index.tolist())
# Calculate percentages
data_viz = []
for cause in top_causes:
for year in [previous_year, selected_year]:
# Get H_FER for this cause and year
fer_row = h_fer_data[
(h_fer_data['CAU_F_DESCR'] == cause) &
(h_fer_data['ANNO'] == year)
]
# Get total H_SCHED for this year
sched_row = h_sched_data[h_sched_data['ANNO'] == year]
if len(fer_row) > 0 and len(sched_row) > 0:
h_fer_val = fer_row['H_FER'].iloc[0]
h_sched_val = sched_row['H_SCHED'].iloc[0]
pct_value = (h_fer_val / h_sched_val) * 100
else:
pct_value = 0.0
data_viz.append({
'Causale': cause,
'Anno': str(year),
'Percentuale': pct_value
})
df_viz = pd.DataFrame(data_viz)
if df_viz.empty:
print(f"❌ No visualization data available")
return None
# Corporate color palette
COLORS = {
'previous_year': '#00AEEF',
'selected_year': '#4D4D4F'
}
# Create figure
fig, ax = plt.subplots(figsize=(14, 10.5), dpi=100, facecolor='white')
# Create the bar plot
bar_plot = sns.barplot(
data=df_viz,
x='Causale',
y='Percentuale',
hue='Anno',
order=top_causes,
hue_order=[str(previous_year), str(selected_year)],
palette=[COLORS['previous_year'], COLORS['selected_year']],
ax=ax
)
# Customize the plot
mese_names = ['', 'Gennaio', 'Febbraio', 'Marzo', 'Aprile', 'Maggio', 'Giugno',
'Luglio', 'Agosto', 'Settembre', 'Ottobre', 'Novembre', 'Dicembre']
mese_nome = mese_names[selected_month] if selected_month <= 12 else f'Mese {selected_month}'
title = f"Causali di Fermo - % su Ore Schedulate\nGennaio/{mese_nome} - {previous_year}/{selected_year}"
ax.set_title(title, fontsize=14, fontweight='bold', pad=20)
# Y-axis configuration
max_val = df_viz['Percentuale'].max()
y_limit = max(4.0, int(max_val * 1.2))
ax.set_ylim(0, y_limit)
ax.set_ylabel('Percentuale (%)', fontsize=12)
ax.set_xlabel('')
# Grid lines
ax.grid(True, axis='y', alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Format y-axis labels as percentages
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.1f}%'))
# Rotate x-axis labels for better readability
plt.xticks(rotation=0, ha='center', fontsize=15)
# Add data labels on bars
_annotate_causali_bars(ax, df_viz, previous_year, selected_year, top_causes)
# Legend configuration
legend = ax.legend(title='', loc='upper center', bbox_to_anchor=(0.5, -0.15),
ncol=2, frameon=False, fontsize=13)
# Adjust layout
plt.tight_layout()
return fig
def add_campionatura_table_slide(prs, df, selected_year, selected_month, reparto_code, template_path=None, output_dir='.'):
"""
Adds a new slide with a styled campionatura (sampling hours) comparison table to a presentation.
If `prs` is None, a new presentation is created using `create_ops_review_presentation`.
The function adds one slide for the specified department, showing a month-by-month
campionatura comparison for the selected year vs. the previous year.
Args:
prs (pptx.Presentation or None): The presentation object. If None, a new one is created.
df (pd.DataFrame): The raw 'Dati Grezzi' DataFrame.
selected_year (int): The primary year for comparison.
selected_month (int): The month to report up to.
reparto_code (str): The department code (e.g., 'ST', 'MS').
template_path (str, optional): Path to the PowerPoint template. Required if prs is None.
output_dir (str): Directory where the presentation will be saved.
Returns:
prs (pptx.Presentation): The presentation object with the new slide added.
"""
from lxml import etree
from pptx.oxml.ns import qn
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_VERTICAL_ANCHOR
import pandas as pd
import locale
# Set Italian locale for month names
try:
locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
except locale.Error:
try:
locale.setlocale(locale.LC_TIME, 'it_IT')
except locale.Error:
print("⚠️ Italian locale not found, using default. Month names may be in English.")
if prs is None:
if template_path is None:
raise ValueError("A template_path must be provided if prs is None.")
print(f"✨ Creating new presentation using template")
_, prs = create_ops_review_presentation(template_path, selected_month, selected_year, output_dir)
else:
print(f"πŸ“‚ Using provided presentation object")
# --- Data and Metadata Preparation ---
previous_year = selected_year - 1
# Use the same department descriptions mapping
REPARTO_DESCRIPTIONS = {
'AS': 'ASSEMBLAGGIO',
'DC': 'DECORAZIONI',
'IS': 'INIEZIONE SOFFIAGGIO',
'MS': 'STAMPAGGIO SURLYN',
'PS': 'PRODUZIONE SCOVOLI',
'ST': 'STAMPAGGIO'
}
reparto_descr = REPARTO_DESCRIPTIONS.get(reparto_code, reparto_code)
# Get campionatura data using the build_df_campionatura function
try:
df_campionatura = build_df_campionatura(df, reparto_code, selected_year, selected_month)
except ValueError as e:
print(f"❌ Error getting campionatura data: {e}")
return prs
# --- Slide Creation ---
# Try different layouts to find one with a title
slide_layout = None
for layout_idx in range(len(prs.slide_layouts)):
test_slide = prs.slides.add_slide(prs.slide_layouts[layout_idx])
if test_slide.shapes.title is not None:
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
slide_layout = prs.slide_layouts[layout_idx]
break
else:
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
if slide_layout is None:
slide_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_layout)
# Set Title
title_text = f"ORE CAMPIONATURA I.M {previous_year}/{selected_year}"
if slide.shapes.title is not None:
slide.shapes.title.text = title_text
slide.shapes.title.text_frame.paragraphs[0].font.size = Pt(32)
slide.shapes.title.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
else:
# Create title manually
title_box = slide.shapes.add_textbox(Inches(0.3), Inches(0.3), Inches(12.3), Inches(1.0))
title_frame = title_box.text_frame
title_frame.text = title_text
title_para = title_frame.paragraphs[0]
title_para.font.size = Pt(32)
title_para.font.bold = True
title_para.alignment = PP_ALIGN.LEFT
# --- Table Creation and Positioning ---
# Calculate number of rows: header + months up to selected_month
rows = 1 + selected_month # 1 header + months
cols = 5 # Month + 4 data columns
table_shape = slide.shapes.add_table(rows, cols, Inches(0.3), Inches(1.6), Inches(12.4), Inches(7.0))
table = table_shape.table
# --- Header Styling ---
# Column headers
headers = [
f"{previous_year}\n{selected_year}",
"H CAMPIONATURA\nTEORICHE",
"H CAMPIONATURA\nREALI",
"Ξ”\nASSOLUTO",
"Ξ”\n%"
]
for i, header in enumerate(headers):
cell = table.cell(0, i)
cell.fill.solid()
if i == 0: # First column (years)
cell.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Light grey BACKGROUND
else:
cell.fill.fore_color.rgb = RGBColor(0x3F, 0x3F, 0x3F) # Dark gray
tf = cell.text_frame
tf.clear()
if i == 0: # Years column - special styling
lines = header.split('\n')
# Previous year paragraph - GRAY TEXT
p1 = tf.paragraphs[0]
p1.text = lines[0]
p1.font.color.rgb = RGBColor(0x00, 0xAE, 0xEF) # BLUE
p1.font.bold = True
p1.font.size = Pt(20)
p1.alignment = PP_ALIGN.CENTER
# Current year paragraph - WHITE TEXT
p2 = tf.add_paragraph()
p2.text = lines[1]
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p2.font.bold = True
p2.font.size = Pt(20)
p2.alignment = PP_ALIGN.CENTER
else:
# Regular header styling
p = tf.paragraphs[0]
p.text = header
p.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # WHITE
p.font.bold = True
p.font.size = Pt(18)
p.alignment = PP_ALIGN.CENTER
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# --- Body Data Filling ---
# Italian month names
month_names = [pd.Timestamp(f'2024-{m}-01').strftime('%B').upper() for m in range(1, 13)]
current_row = 1
for month_idx in range(selected_month):
month_name = month_names[month_idx]
month_num = month_idx + 1
# Month Name Column
cell_month = table.cell(current_row, 0)
cell_month.text = month_name
cell_month.fill.solid()
cell_month.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Light grey
p = cell_month.text_frame.paragraphs[0]
p.font.bold = True
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p.font.size = Pt(18)
p.alignment = PP_ALIGN.CENTER
cell_month.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# Get data for this month from both years
prev_period = pd.Period(f"{previous_year}-{month_num:02d}", freq='M')
curr_period = pd.Period(f"{selected_year}-{month_num:02d}", freq='M')
# Data columns
for col_idx, column in enumerate(['H_CAMP_T', 'H_CAMP', 'DELTA_ABS', 'DELTA_PCT'], 1):
cell = table.cell(current_row, col_idx)
tf = cell.text_frame
tf.clear()
# Get values for both years
prev_val = df_campionatura.loc[prev_period, column] if prev_period in df_campionatura.index else None
curr_val = df_campionatura.loc[curr_period, column] if curr_period in df_campionatura.index else None
# Previous year value
p1 = tf.paragraphs[0]
if pd.notna(prev_val) and prev_val != 0:
if column == 'DELTA_PCT':
p1.text = f"{prev_val:.0f}%".replace('.', ',') if pd.notna(prev_val) else ""
else:
p1.text = f"{prev_val:.0f}".replace('.', ',') if pd.notna(prev_val) else ""
else:
p1.text = ""
p1.font.color.rgb = RGBColor(0x00, 0xAE, 0xEF) # Blue (0x00, 0xAE, 0xEF)
p1.font.bold = True
p1.font.size = Pt(16)
p1.alignment = PP_ALIGN.CENTER
# Current year value
p2 = tf.add_paragraph()
if pd.notna(curr_val) and curr_val != 0:
if column == 'DELTA_PCT':
p2.text = f"{curr_val:+.0f}%".replace('.', ',') if pd.notna(curr_val) else ""
# Color coding for percentage deltas
if pd.notna(curr_val):
if curr_val > 0:
p2.font.color.rgb = RGBColor(0xC0, 0x00, 0x00) # Red for positive (bad)
else:
p2.font.color.rgb = RGBColor(0x00, 0xB0, 0x50) # Green for negative (good)
else:
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
elif column == 'DELTA_ABS':
p2.text = f"{curr_val:+.0f}".replace('.', ',') if pd.notna(curr_val) else ""
# Color coding for absolute deltas
if pd.notna(curr_val):
if curr_val > 0:
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Red for positive (bad)
else:
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Green for negative (good)
else:
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
else:
# Regular data columns (H_CAMP_T, H_CAMP)
p2.text = f"{curr_val:.0f}".replace('.', ',') if pd.notna(curr_val) else ""
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Grey for data values
else:
p2.text = ""
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p2.font.bold = True
p2.font.size = Pt(16)
p2.alignment = PP_ALIGN.CENTER
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White background
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
current_row += 1
# --- Table Border Styling (BLACK borders) ---
def set_table_black_borders(table):
"""Set all borders to solid 1pt black lines on every cell."""
NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"
# Remove table style to prevent overrides
tbl = table._tbl
tblPr = tbl.tblPr
tblStyle = tblPr.find(qn('a:tblStyle'))
if tblStyle is not None:
tblPr.remove(tblStyle)
# Set borders on every cell
for row in table.rows:
for cell in row.cells:
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
for border_dir in ('lnL', 'lnR', 'lnT', 'lnB'):
# Remove existing borders
for elem in tcPr.findall(f'a:{border_dir}', namespaces={'a': NS_A}):
tcPr.remove(elem)
# Add 1pt black border
ln = etree.SubElement(
tcPr, f'{{{NS_A}}}{border_dir}',
{'w': '12700', 'cap': 'flat', 'cmpd': 'sng', 'algn': 'ctr'}
)
solidFill = etree.SubElement(ln, f'{{{NS_A}}}solidFill')
etree.SubElement(solidFill, f'{{{NS_A}}}srgbClr', val='000000')
etree.SubElement(ln, f'{{{NS_A}}}prstDash', val='solid')
# Apply black borders
set_table_black_borders(table)
# --- Final Table Adjustments ---
# Set column widths
col_widths = [Inches(3.5), Inches(4.5), Inches(4.5), Inches(4.5), Inches(4.5)]
for i, width in enumerate(col_widths):
table.columns[i].width = width
# Set row heights
table.rows[0].height = Inches(1.0) # Header row
for r in range(1, rows):
table.rows[r].height = Inches(0.8) # Data rows
print(f"βœ… Slide 'ORE CAMPIONATURA' for {reparto_descr} added successfully.")
# --- Unit Test Checks ---
print("\n--- Running Checks ---")
print(f"Table has {len(table.rows)} rows (expected {rows})")
print(f"Table has {len(table.columns)} columns (expected {cols})")
print(f"βœ… Campionatura slide created for department {reparto_code}")
return prs
def _annotate_attrezzaggi_bars(ax, theoretical_times, year_prev, year_curr, attr_order):
"""
Annotate the bar-chart produced by `build_attrezzaggi_chart()`.
β€’ Smart placement: puts the label above the bar unless it would overflow,
in which case it is drawn inside the bar.
β€’ Colour logic:
– Theoretical bar β†’ label always black
– Prev/Cur Yr β†’ green if ≀ theoretical, red if > theoretical
β€’ Contrast: labels drawn inside bars have a white stroke for readability.
Returns
-------
matplotlib.axes.Axes
The same axes, for possible chaining.
"""
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
# --- constants --------------------------------------------------------- #
GREEN = "#006100" # Green for values ≀ theoretical (good performance)
RED = "#C00000" # Red for values > theoretical (poor performance)
y_min, y_max = ax.get_ylim()
offset = max((y_max - y_min) * 0.015, 0.2) # 1.5% of range or β‰₯ 0.2 hours
# How many bars compose one attrezzaggio group?
bars_per_group = 0
if ax.containers:
bars_per_group = len(ax.containers)
if bars_per_group == 0:
return ax
# Map attr_order to theoretical times
attr_to_theoretical = {
'CS': theoretical_times['CS'],
'CV': theoretical_times['CV'],
'CC': theoretical_times['CC']
}
# ----------------------------------------------------------------------- #
for idx, bar in enumerate(ax.patches):
height = bar.get_height()
if height <= 0: # nothing to show
continue
# Identify attrezzaggio type and bar type (prev-yr / theoretical / curr-yr)
attr_idx = idx % len(attr_order)
tipo_idx = idx // len(attr_order)
if attr_idx >= len(attr_order): # safety net
continue
attr_type = attr_order[attr_idx]
theoretical_val = attr_to_theoretical[attr_type]
bar_type = ""
# Determine bar_type from hue order used in the plot
hue_order = [str(year_prev), 'Teorico', str(year_curr)]
if tipo_idx < len(hue_order):
bar_type = hue_order[tipo_idx]
# --- colour selection --------------------------------------------- #
label_colour = "black" # Default
if bar_type == "Teorico":
label_colour = "black"
elif bar_type in {str(year_prev), str(year_curr)}:
# For attrezzaggi, lower is better (green if ≀ theoretical, red if > theoretical)
label_colour = GREEN if height <= theoretical_val else RED
# --- label positioning and contrast effect ------------------------ #
bbox_props = None
if height > y_max - (y_max * 0.05): # Overflow if height > 95% of y_max
y_pos = height - offset
va = "top"
# Add a white box for contrast when inside the bar
bbox_props = dict(boxstyle="square,pad=0.2", fc="white", ec="none", alpha=0.7)
else: # above the bar
y_pos = height + offset
va = "bottom"
ax.text(
bar.get_x() + bar.get_width() / 2.0,
y_pos,
f"{height:.1f}",
ha="center",
va=va,
fontsize=13,
fontweight="bold",
color=label_colour,
bbox=bbox_props
)
return ax
def build_attrezzaggi_chart(df, selected_month, selected_year, reparto_codes):
"""
Builds a seaborn bar chart comparing selected_year YTD attrezzaggi times
with previous year and theoretical times.
Parameters:
-----------
df : pd.DataFrame
Raw data DataFrame (Dati_Grezzi)
selected_month : int
Month up to which to calculate YTD (1-12)
selected_year : int
Year for comparison
reparto_codes : str or list
Department code(s) (e.g., 'ST', ['ST', 'MS'])
Returns:
--------
matplotlib.figure.Figure
The created figure object
"""
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
import locale
# Set Italian locale for month names (fallback to default if not available)
try:
locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
except:
try:
locale.setlocale(locale.LC_TIME, 'it_IT')
except:
pass # Use default locale
# Theoretical times for each attrezzaggio type
theoretical_times = {
'CS': 8.0, # Cambio stampo: 8 ore
'CV': 6.0, # Cambio versione: 6 ore
'CC': 4.0 # Cambio colore: 4 ore
}
# Generate YTD data using build_df_attrezzaggi
df_attrezzaggi = build_df_attrezzaggi(df, reparto_codes, selected_year, selected_month)
if df_attrezzaggi.empty:
print(f"❌ No attrezzaggi data available for {reparto_codes} up to month {selected_month} of {selected_year}")
return None
# Corporate color palette (similar to efficiency chart but adapted)
COLORS = {
'previous_year': '#00AEEF', # Corporate blue
'theoretical': '#E6E2E0', # Light taupe for theoretical
'selected_year': '#4D4D4F' # Dark grey
}
# Attrezzaggio order for x-axis
attr_order = ['CS', 'CV', 'CC']
attr_labels = ['Cambio Stampo', 'Cambio Versione', 'Cambio Colore']
# Get department description
if isinstance(reparto_codes, str):
reparto_codes = [reparto_codes]
reparto_descriptions = {
'ST': 'Stampaggio',
'IS': 'Iniezione Soffiaggio',
'AS': 'Assemblaggio',
'PS': 'Produzione Scovoli',
'MS': 'Stampaggio Surlyn',
'DC': 'Decorazioni'
}
if len(reparto_codes) == 1:
descrizione_reparto = reparto_descriptions.get(reparto_codes[0], reparto_codes[0])
else:
descrizione_reparto = '+'.join([reparto_descriptions.get(r, r) for r in sorted(reparto_codes)])
# Prepare data for visualization
year_prev = selected_year - 1
# Extract values from df_attrezzaggi
data_viz = []
for attr_type in attr_order:
# Get values for both years
try:
# Previous year
prev_year_row = df_attrezzaggi[df_attrezzaggi['Year'] == year_prev]
prev_year_val = prev_year_row[f'MEDIA_H_{attr_type}'].iloc[0] if len(prev_year_row) > 0 else 0
except (KeyError, IndexError):
prev_year_val = 0
try:
# Current year
curr_year_row = df_attrezzaggi[df_attrezzaggi['Year'] == selected_year]
curr_year_val = curr_year_row[f'MEDIA_H_{attr_type}'].iloc[0] if len(curr_year_row) > 0 else 0
except (KeyError, IndexError):
curr_year_val = 0
# Theoretical time
theoretical_val = theoretical_times[attr_type]
# Add data points for each bar
attr_label = attr_labels[attr_order.index(attr_type)]
data_viz.extend([
{'Attrezzaggio': attr_label, 'Tipo': f'{year_prev}', 'Valore': prev_year_val, 'Color': 'previous_year'},
{'Attrezzaggio': attr_label, 'Tipo': 'Teorico', 'Valore': theoretical_val, 'Color': 'theoretical'},
{'Attrezzaggio': attr_label, 'Tipo': f'{selected_year}', 'Valore': curr_year_val, 'Color': 'selected_year'}
])
df_viz = pd.DataFrame(data_viz)
# Create figure with exact dimensions (960x540 px at 100 DPI)
fig, ax = plt.subplots(figsize=(10, 10.5), dpi=100, facecolor='white')
# Create the bar plot
bar_plot = sns.barplot(
data=df_viz,
x='Attrezzaggio',
y='Valore',
hue='Tipo',
order=attr_labels,
hue_order=[f'{year_prev}', 'Teorico', f'{selected_year}'],
palette=[COLORS['previous_year'], COLORS['theoretical'], COLORS['selected_year']],
ax=ax
)
# Customize the plot
# Title
mese_names = ['', 'Gennaio', 'Febbraio', 'Marzo', 'Aprile', 'Maggio', 'Giugno',
'Luglio', 'Agosto', 'Settembre', 'Ottobre', 'Novembre', 'Dicembre']
mese_nome = mese_names[selected_month] if selected_month <= 12 else f'Mese {selected_month}'
title = f"Tempi Attrezzaggio {descrizione_reparto} Gennaio/{mese_nome} - {year_prev}/{selected_year}"
ax.set_title(title, fontsize=14, fontweight='bold', pad=20)
# Y-axis configuration
# Find max value to set appropriate y-axis limit
max_val = df_viz['Valore'].max()
y_limit = max(16, int(max_val * 1.1)) # At least 16 hours or 110% of max value
ax.set_ylim(0, y_limit)
ax.set_ylabel('Tempo (ore)', fontsize=12)
ax.set_xlabel('')
# Grid lines
ax.grid(True, axis='y', alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Format y-axis labels as hours
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0f}h'))
# Add data labels using helper function
_annotate_attrezzaggi_bars(ax, theoretical_times, year_prev, selected_year, attr_order)
# Legend configuration
legend = ax.legend(title='', loc='upper center', bbox_to_anchor=(0.5, -0.05),
ncol=3, frameon=False, fontsize=10)
# Adjust layout to prevent clipping
plt.tight_layout()
# Set transparent background if needed
fig.patch.set_alpha(1.0) # Change to 0.0 for transparent
return fig
def add_top_offenders_slide(prs, df, selected_year, selected_month):
"""
Creates a new slide with top causali fermo chart for all departments.
Parameters:
-----------
prs : pptx.presentation.Presentation
PowerPoint presentation object
df : pandas.DataFrame
Raw data DataFrame (Dati_Grezzi)
selected_year : int
Year for comparison
selected_month : int
Month up to which to calculate YTD (1-12)
Returns:
--------
pptx.presentation.Presentation
Updated presentation object with new chart slide
"""
import tempfile
import os
# Store initial slide count for unit testing
initial_slide_count = len(prs.slides)
# Generate the chart using build_causali_fermo_chart
fig = build_causali_fermo_chart(df, selected_year, selected_month)
if fig is None:
print("❌ Could not generate causali fermo chart")
return prs
# Create custom title based on selected_year
previous_year = selected_year - 1
chart_title = f"TOP 3 OFFENDER {previous_year}-{selected_year} (YTD)"
# Remove the chart title and y-axis label to avoid duplication and clean up the chart
fig.axes[0].set_title('')
fig.axes[0].set_ylabel('') # Remove y-axis label
# Save chart as temporary image
temp_img_path = None
try:
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
temp_img_path = tmp_file.name
# Save with exact dimensions (build_causali_fermo_chart already sets figsize correctly)
fig.savefig(temp_img_path, dpi=96, bbox_inches='tight',
facecolor='white', edgecolor='none')
# Try to use layout 1 ("B"), otherwise find suitable layout
slide_layout = None
if len(prs.slide_layouts) > 1:
# Try layout index 1 ("B")
slide_layout = prs.slide_layouts[1]
else:
# Fallback: find first layout with title and content placeholders
for layout in prs.slide_layouts:
placeholders = layout.placeholders
has_title = any(p.placeholder_format.type == 1 for p in placeholders) # Title placeholder
has_content = any(p.placeholder_format.type == 7 for p in placeholders) # Content placeholder
if has_title and has_content:
slide_layout = layout
break
# If still no suitable layout found, use the first available
if slide_layout is None:
slide_layout = prs.slide_layouts[0]
# Create new slide
slide = prs.slides.add_slide(slide_layout)
# Set slide title - LEFT ALIGNED and UPPERCASE to match table slide
title_placeholder = None
for placeholder in slide.placeholders:
if placeholder.placeholder_format.type == 1: # Title placeholder
title_placeholder = placeholder
break
if title_placeholder:
title_placeholder.text = chart_title # Custom title
# Format title: 32pt, bold, LEFT aligned, black (matching table slide)
title_frame = title_placeholder.text_frame
title_paragraph = title_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT # Changed from CENTER to LEFT
title_run = title_paragraph.runs[0]
title_run.font.size = Pt(32)
title_run.font.bold = True
title_run.font.color.rgb = RGBColor(0, 0, 0) # Black
else:
# Create title manually if no placeholder - LEFT ALIGNED and positioned to match table
title_box = slide.shapes.add_textbox(Inches(0.3), Inches(0.3), Inches(12.3), Inches(1.0))
title_frame = title_box.text_frame
title_frame.text = chart_title # Custom title
title_para = title_frame.paragraphs[0]
title_para.font.size = Pt(32)
title_para.font.bold = True
title_para.alignment = PP_ALIGN.LEFT # LEFT ALIGN to match table
title_para.font.color.rgb = RGBColor(0, 0, 0) # Black
# Insert chart image with specified dimensions: figsize=(14, 10.5) inches
content_placeholder = None
for placeholder in slide.placeholders:
if placeholder.placeholder_format.type == 7: # Content placeholder
content_placeholder = placeholder
break
# Define target dimensions: figsize=(14, 10.5) inches
target_width = Inches(14.0) # 14 inches width
target_height = Inches(10.5) # 10.5 inches height
if content_placeholder:
# Remove the placeholder
content_placeholder._element.getparent().remove(content_placeholder._element)
# Position image - moved right for better centering
# Standard slide width is ~13.33", so we move it right to center better
slide_width = Inches(13.33) # Standard slide width
# Calculate center position and then add offset to move right
center_left = (slide_width - target_width) / 2
# Move right by 1.5 inches for better visual centering
left = center_left + Inches(1.5) if target_width < slide_width else Inches(1.8)
top = Inches(1.6) # Match table top position
slide.shapes.add_picture(temp_img_path, left, top, target_width, target_height)
# Unit test checks
final_slide_count = len(prs.slides)
print(f"βœ… Top offenders slide added")
print(f"πŸ“Š Slide count: {initial_slide_count} β†’ {final_slide_count}")
print(f"πŸ“‹ Slide title: '{chart_title}'")
print(f"🎯 Chart title and y-axis label removed from image")
# Verify image dimensions (14" x 10.5")
added_shape = slide.shapes[-1] # Last added shape should be our image
expected_width_emu = int(14.0 * 914400) # 14" in EMU
expected_height_emu = int(10.5 * 914400) # 10.5" in EMU
tolerance = 0.01 # 1% tolerance
width_ok = abs(added_shape.width - expected_width_emu) / expected_width_emu <= tolerance
height_ok = abs(added_shape.height - expected_height_emu) / expected_height_emu <= tolerance
if width_ok and height_ok:
print(f"βœ… Image dimensions verified: {added_shape.width} x {added_shape.height} EMU")
else:
print(f"⚠️ Image dimensions: {added_shape.width} x {added_shape.height} EMU (expected ~{expected_width_emu} x {expected_height_emu})")
print(f"πŸ“ Image positioned at: Left={added_shape.left/914400:.1f}\", Top={added_shape.top/914400:.1f}\"")
# Warning about size
if target_width > slide_width:
print(f"⚠️ Warning: Image width ({target_width/914400:.1f}\") exceeds standard slide width ({slide_width/914400:.1f}\")")
# Unit test assertions
assert final_slide_count == initial_slide_count + 1, f"Expected {initial_slide_count + 1} slides, got {final_slide_count}"
finally:
# Clean up temporary file
if temp_img_path and os.path.exists(temp_img_path):
os.unlink(temp_img_path)
# Close the matplotlib figure to free memory
plt.close(fig)
return prs
def add_attrezzaggi_slide(prs, df, selected_year, selected_month, reparto_codes, template_path=None, output_dir='.'):
"""
Adds a new slide with both a styled attrezzaggi comparison table and chart to a presentation.
If `prs` is None, a new presentation is created using `create_ops_review_presentation`.
The function adds one slide with both table and chart for the specified department(s),
showing YTD attrezzaggi comparison for the selected year vs. the previous year.
Args:
prs (pptx.Presentation or None): The presentation object. If None, a new one is created.
df (pd.DataFrame): The raw 'Dati Grezzi' DataFrame.
selected_year (int): The primary year for comparison.
selected_month (int): The month to report up to.
reparto_codes (str or list): The department code(s) (e.g., 'ST', ['ST', 'MS']).
template_path (str, optional): Path to the PowerPoint template. Required if prs is None.
output_dir (str): Directory where the presentation will be saved.
Returns:
prs (pptx.Presentation): The presentation object with the new slide added.
"""
from lxml import etree
from pptx.oxml.ns import qn
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_VERTICAL_ANCHOR
import pandas as pd
import locale
import tempfile
import os
import matplotlib.pyplot as plt
# Set Italian locale for month names
try:
locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
except locale.Error:
try:
locale.setlocale(locale.LC_TIME, 'it_IT')
except locale.Error:
print("⚠️ Italian locale not found, using default. Month names may be in English.")
if prs is None:
if template_path is None:
raise ValueError("A template_path must be provided if prs is None.")
print(f"✨ Creating new presentation using template")
_, prs = create_ops_review_presentation(template_path, selected_month, selected_year, output_dir)
else:
print(f"πŸ“‚ Using provided presentation object")
# --- Data and Metadata Preparation ---
previous_year = selected_year - 1
# Handle single reparto_code input
if isinstance(reparto_codes, str):
reparto_codes = [reparto_codes]
# Use the same department descriptions mapping
REPARTO_DESCRIPTIONS = {
'AS': 'ASSEMBLAGGIO',
'DC': 'DECORAZIONI',
'IS': 'INIEZIONE SOFFIAGGIO',
'MS': 'STAMPAGGIO SURLYN',
'PS': 'PRODUZIONE SCOVOLI',
'ST': 'STAMPAGGIO'
}
# Get description for title
if len(reparto_codes) == 1:
reparto_descr = REPARTO_DESCRIPTIONS.get(reparto_codes[0], reparto_codes[0])
else:
reparto_descriptions = [REPARTO_DESCRIPTIONS.get(r, r) for r in sorted(reparto_codes)]
reparto_descr = '+'.join(reparto_descriptions)
# Get attrezzaggi data using the build_df_attrezzaggi function
try:
df_attrezzaggi = build_df_attrezzaggi(df, reparto_codes, selected_year, selected_month)
except ValueError as e:
print(f"❌ Error getting attrezzaggi data: {e}")
return prs
# Generate chart using build_attrezzaggi_chart function
try:
fig = build_attrezzaggi_chart(df, selected_month, selected_year, reparto_codes)
if fig is None:
print(f"❌ Error generating attrezzaggi chart")
return prs
except Exception as e:
print(f"❌ Error generating attrezzaggi chart: {e}")
return prs
# --- Slide Creation ---
# Try different layouts to find one with a title
slide_layout = None
for layout_idx in range(len(prs.slide_layouts)):
test_slide = prs.slides.add_slide(prs.slide_layouts[layout_idx])
if test_slide.shapes.title is not None:
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
slide_layout = prs.slide_layouts[layout_idx]
break
else:
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
if slide_layout is None:
slide_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_layout)
# Set Title - similar to campionatura
title_text = f"ZOOM ATTREZZAGGI I.M. {previous_year}/{selected_year} - YTD"
if slide.shapes.title is not None:
slide.shapes.title.text = title_text
slide.shapes.title.text_frame.paragraphs[0].font.size = Pt(32)
slide.shapes.title.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
else:
# Create title manually
title_box = slide.shapes.add_textbox(Inches(0.3), Inches(0.3), Inches(12.3), Inches(1.0))
title_frame = title_box.text_frame
title_frame.text = title_text
title_para = title_frame.paragraphs[0]
title_para.font.size = Pt(32)
title_para.font.bold = True
title_para.alignment = PP_ALIGN.LEFT
# --- Table Creation and Positioning (Left Side) ---
# Position table on the left side of the slide
# NEW STRUCTURE: Rows are metrics, Columns are attrezzaggio types
table_rows = 5 # Header + H teoriche ATTR + H reali ATTR + NΒ°ATTR + Media h
table_cols = 4 # Years column + CS + CV + CC
# Table positioned on left side (first ~60% of slide width)
table_shape = slide.shapes.add_table(table_rows, table_cols, Inches(0.3), Inches(1.6), Inches(7.5), Inches(5.5))
table = table_shape.table
# --- Header Row Styling ---
headers = [
f"{previous_year}\n{selected_year}",
"<8h\nCS",
"<6h\nCV",
"<4h\nCC"
]
for i, header in enumerate(headers):
cell = table.cell(0, i)
cell.fill.solid()
if i == 0: # First column (years)
cell.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Light grey BACKGROUND
else:
cell.fill.fore_color.rgb = RGBColor(0x3F, 0x3F, 0x3F) # Dark gray
tf = cell.text_frame
tf.clear()
if i == 0: # Years column - special styling
lines = header.split('\n')
# Previous year paragraph - BLUE TEXT
p1 = tf.paragraphs[0]
p1.text = lines[0]
p1.font.color.rgb = RGBColor(0x00, 0xAE, 0xEF) # BLUE
p1.font.bold = True
p1.font.size = Pt(28)
p1.alignment = PP_ALIGN.CENTER
# Current year paragraph - BLACK TEXT
p2 = tf.add_paragraph()
p2.text = lines[1]
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p2.font.bold = True
p2.font.size = Pt(28)
p2.alignment = PP_ALIGN.CENTER
else:
# Regular header styling
p = tf.paragraphs[0]
p.text = header
p.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # WHITE
p.font.bold = True
p.font.size = Pt(28)
p.alignment = PP_ALIGN.CENTER
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# --- Table Data Filling ---
# Row labels (metrics) and corresponding data columns
row_metrics = [
("H teoriche\nATTR.", ["H_ATT_T_CS", "H_ATT_T_CV", "H_ATT_T_CC"]),
("H reali\nATTR.", ["OREATT_CS", "OREATT_CV", "OREATT_CC"]),
("NΒ°ATTR.", ["N_ATTR_CS", "N_ATTR_CV", "N_ATTR_CC"]),
("Media h", ["MEDIA_H_CS", "MEDIA_H_CV", "MEDIA_H_CC"])
]
for row_idx, (row_label, metric_columns) in enumerate(row_metrics, 1):
# Row Label Column (metrics) - FIXED STYLING
cell_label = table.cell(row_idx, 0)
cell_label.fill.solid()
cell_label.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Light grey
# Handle multi-line text properly
tf = cell_label.text_frame
tf.clear()
if '\n' in row_label:
lines = row_label.split('\n')
# First line
p1 = tf.paragraphs[0]
p1.text = lines[0]
p1.font.bold = True
p1.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p1.font.size = Pt(26)
p1.alignment = PP_ALIGN.CENTER
# Second line
p2 = tf.add_paragraph()
p2.text = lines[1]
p2.font.bold = True
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p2.font.size = Pt(26)
p2.alignment = PP_ALIGN.CENTER
else:
# Single line
p = tf.paragraphs[0]
p.text = row_label
p.font.bold = True
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p.font.size = Pt(26)
p.alignment = PP_ALIGN.CENTER
cell_label.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# Data columns for each attrezzaggio type (CS, CV, CC)
for col_idx, column in enumerate(metric_columns, 1):
cell = table.cell(row_idx, col_idx)
tf = cell.text_frame
tf.clear()
# Get values for both years
try:
prev_year_row = df_attrezzaggi[df_attrezzaggi['Year'] == previous_year]
prev_val = prev_year_row[column].iloc[0] if len(prev_year_row) > 0 else 0
except (KeyError, IndexError):
prev_val = 0
try:
curr_year_row = df_attrezzaggi[df_attrezzaggi['Year'] == selected_year]
curr_val = curr_year_row[column].iloc[0] if len(curr_year_row) > 0 else 0
except (KeyError, IndexError):
curr_val = 0
# Previous year value with 1 decimal place
p1 = tf.paragraphs[0]
if pd.notna(prev_val) and prev_val != 0:
# Format with 1 decimal place and use comma as decimal separator
p1.text = f"{prev_val:.1f}".replace('.', ',')
else:
p1.text = ""
p1.font.color.rgb = RGBColor(0x00, 0xAE, 0xEF) # Blue
p1.font.bold = True
p1.font.size = Pt(23)
p1.alignment = PP_ALIGN.CENTER
# Current year value with 1 decimal place
p2 = tf.add_paragraph()
if pd.notna(curr_val) and curr_val != 0:
# Format with 1 decimal place and use comma as decimal separator
p2.text = f"{curr_val:.1f}".replace('.', ',')
else:
p2.text = ""
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
p2.font.bold = True
p2.font.size = Pt(23)
p2.alignment = PP_ALIGN.CENTER
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White background
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# --- Table Border Styling (BLACK borders) ---
def set_table_black_borders(table):
"""Set all borders to solid 1pt black lines on every cell."""
NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"
# Remove table style to prevent overrides
tbl = table._tbl
tblPr = tbl.tblPr
tblStyle = tblPr.find(qn('a:tblStyle'))
if tblStyle is not None:
tblPr.remove(tblStyle)
# Set borders on every cell
for row in table.rows:
for cell in row.cells:
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
for border_dir in ('lnL', 'lnR', 'lnT', 'lnB'):
# Remove existing borders
for elem in tcPr.findall(f'a:{border_dir}', namespaces={'a': NS_A}):
tcPr.remove(elem)
# Add 1pt black border
ln = etree.SubElement(
tcPr, f'{{{NS_A}}}{border_dir}',
{'w': '12700', 'cap': 'flat', 'cmpd': 'sng', 'algn': 'ctr'}
)
solidFill = etree.SubElement(ln, f'{{{NS_A}}}solidFill')
etree.SubElement(solidFill, f'{{{NS_A}}}srgbClr', val='000000')
etree.SubElement(ln, f'{{{NS_A}}}prstDash', val='solid')
# Apply black borders
set_table_black_borders(table)
# Set table column widths
col_widths = [Inches(3.6), Inches(3.1), Inches(3.1), Inches(3.1)]
for i, width in enumerate(col_widths):
table.columns[i].width = width
# Set row heights
table.rows[0].height = Inches(2) # Header row
for r in range(1, table_rows):
table.rows[r].height = Inches(1.9) # Data rows
# --- Chart Addition (Right Side) ---
# Remove chart title and y-axis label to avoid duplication
fig.axes[0].set_title('')
fig.axes[0].set_ylabel('')
# Save chart as temporary image
temp_img_path = None
try:
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
temp_img_path = tmp_file.name
fig.savefig(temp_img_path, dpi=96, bbox_inches='tight',
facecolor='white', edgecolor='none')
# Position chart on the right side of the slide (remaining ~40% of slide width)
chart_left = Inches(14.0) # Start after table
chart_top = Inches(1.6) # Match table top position
chart_width = Inches(10.0) # Adjusted for right side
chart_height = Inches(10.5) # Proportional height
slide.shapes.add_picture(temp_img_path, chart_left, chart_top, chart_width, chart_height)
print(f"βœ… Chart added to slide at position: Left={chart_left/914400:.1f}\", Top={chart_top/914400:.1f}\"")
print(f"πŸ“ Chart dimensions: {chart_width/914400:.1f}\" x {chart_height/914400:.1f}\"")
except Exception as e:
print(f"❌ Error adding chart to slide: {e}")
finally:
# Clean up temporary file
if temp_img_path and os.path.exists(temp_img_path):
os.unlink(temp_img_path)
# Close the matplotlib figure to free memory
plt.close(fig)
print(f"βœ… Slide 'ZOOM ATTREZZAGGI' for {reparto_descr} added successfully.")
# --- Unit Test Checks ---
print("\n--- Running Checks ---")
print(f"Table has {len(table.rows)} rows (expected {table_rows})")
print(f"Table has {len(table.columns)} columns (expected {table_cols})")
print(f"βœ… Attrezzaggi slide created for department(s) {reparto_codes}")
return prs
# NOTE: Additional slide functions (add_ytd_overall_slide,
# add_breakdowns_overall_slide) will be added next
def add_ytd_overall_slide(prs, df, selected_month, selected_year, template_path=None, output_dir='.'):
"""
Adds a new slide with a styled YTD overall efficiency table to a presentation.
If `prs` is None, a new presentation is created using `create_ops_review_presentation`.
The function adds one slide showing YTD KPI comparison for all departments,
comparing the selected year vs. the previous year.
Args:
prs (pptx.Presentation or None): The presentation object. If None, a new one is created.
df (pd.DataFrame): The raw 'Dati Grezzi' DataFrame.
selected_month (int): The month to report up to.
selected_year (int): The primary year for comparison.
template_path (str, optional): Path to the PowerPoint template. Required if prs is None.
output_dir (str): Directory where the presentation will be saved.
Returns:
prs (pptx.Presentation): The presentation object with the new slide added.
"""
from lxml import etree
from pptx.oxml.ns import qn
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_VERTICAL_ANCHOR
if prs is None:
if template_path is None:
raise ValueError("A template_path must be provided if prs is None.")
print(f"✨ Creating new presentation using template")
_, prs = create_ops_review_presentation(template_path, selected_month, selected_year, output_dir)
else:
print(f"πŸ“‚ Using provided presentation object")
# --- Data Preparation ---
previous_year = selected_year - 1
kpis_to_show = ['EFF_REP', 'EFF_PRO', 'EFF_SC', 'EFF_E', 'OEE']
# Get YTD data for all departments
print(f"πŸ”„ Getting YTD data for all departments...")
df_ytd_all = create_df_all_reparti_ytd(df, selected_month, selected_year)
# Check if we have data
if df_ytd_all.empty:
print("❌ No YTD data available for any department")
return prs
print(f"πŸ“Š YTD data shape: {df_ytd_all.shape}")
print(f"πŸ“‹ Departments: {list(df_ytd_all.index)}")
print(f"πŸ“‹ Columns: {list(df_ytd_all.columns)}")
# --- Slide Creation ---
# Try different layouts to find one with a title, starting with index 0
slide_layout = None
for layout_idx in range(len(prs.slide_layouts)):
test_slide = prs.slides.add_slide(prs.slide_layouts[layout_idx])
if test_slide.shapes.title is not None:
# Found a layout with title, remove the test slide and use this layout
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
slide_layout = prs.slide_layouts[layout_idx]
break
else:
# Remove the test slide
prs.slides._sldIdLst.remove(prs.slides._sldIdLst[-1])
# If no layout with title found, use layout 1 and create title manually
if slide_layout is None:
slide_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_layout)
# Set Title - LEFT ALIGNED and positioned ABOVE the table
title_text = "YTD OVERALL"
if slide.shapes.title is not None:
# Title placeholder exists
slide.shapes.title.text = title_text
slide.shapes.title.text_frame.paragraphs[0].font.size = Pt(32)
slide.shapes.title.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT # LEFT ALIGN
else:
# Create title manually - LEFT ALIGNED and positioned ABOVE table
title_box = slide.shapes.add_textbox(Inches(0.3), Inches(0.3), Inches(12.3), Inches(1.0))
title_frame = title_box.text_frame
title_frame.text = title_text
title_para = title_frame.paragraphs[0]
title_para.font.size = Pt(32)
title_para.font.bold = True
title_para.alignment = PP_ALIGN.LEFT # LEFT ALIGN
# --- Table Creation and Positioning ---
num_departments = len(df_ytd_all)
rows, cols = num_departments + 1, 7 # 1 header row + departments, 7 columns (dept + 5 KPIs + COMMENTO)
table_shape = slide.shapes.add_table(rows, cols, Inches(0.3), Inches(1.6), Inches(12.4), Inches(7.0))
table = table_shape.table
# --- Header Styling ---
# Department column header - RED BACKGROUND with years styled differently
cell_dept_header = table.cell(0, 0)
cell_dept_header.fill.solid()
cell_dept_header.fill.fore_color.rgb = RGBColor(0xB4, 0x45, 0x57) # RED BACKGROUND like in image
tf = cell_dept_header.text_frame
tf.clear()
# Previous year paragraph - GRAY TEXT
p1 = tf.paragraphs[0]
p1.text = str(previous_year)
p1.font.color.rgb = RGBColor(0x80, 0x80, 0x80) # GRAY
p1.font.bold = True
p1.font.size = Pt(20) # BIGGER FONT
p1.alignment = PP_ALIGN.CENTER
# Current year paragraph - WHITE TEXT
p2 = tf.add_paragraph()
p2.text = str(selected_year)
p2.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # WHITE
p2.font.bold = True
p2.font.size = Pt(20) # BIGGER FONT
p2.alignment = PP_ALIGN.CENTER
cell_dept_header.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# KPI Headers (merged with targets)
for i, kpi in enumerate(kpis_to_show, 1):
cell_kpi = table.cell(0, i)
# Clear existing text and create two paragraphs
tf = cell_kpi.text_frame
tf.clear()
# KPI name paragraph
p1 = tf.paragraphs[0]
p1.text = kpi
p1.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
p1.font.bold = True
p1.font.size = Pt(20) # BIGGER FONT
p1.alignment = PP_ALIGN.CENTER
# Set background color based on column
cell_kpi.fill.solid()
if kpi == 'OEE': # OEE column should be GRAY
cell_kpi.fill.fore_color.rgb = RGBColor(0x80, 0x80, 0x80) # GRAY for OEE column
else:
cell_kpi.fill.fore_color.rgb = RGBColor(0x3F, 0x3F, 0x3F) # Dark gray for other columns
cell_kpi.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# COMMENTO Header (column 6, index 6)
cell_commento_header = table.cell(0, 6)
cell_commento_header.fill.solid()
cell_commento_header.fill.fore_color.rgb = RGBColor(0x3F, 0x3F, 0x3F) # Dark gray
tf = cell_commento_header.text_frame
tf.clear()
p = tf.paragraphs[0]
p.text = "COMMENTO"
p.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
p.font.bold = True
p.font.size = Pt(20)
p.alignment = PP_ALIGN.CENTER
cell_commento_header.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# --- Body Styling ---
current_row = 1
for dept_name in df_ytd_all.index:
# Department Name Column
cell_dept = table.cell(current_row, 0)
cell_dept.text = dept_name
cell_dept.fill.solid()
cell_dept.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Very light grey
p = cell_dept.text_frame.paragraphs[0]
p.font.bold = True
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p.font.size = Pt(20) # BIGGER FONT
p.alignment = PP_ALIGN.CENTER
cell_dept.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# KPI Value Cells
for c, kpi in enumerate(kpis_to_show, 1):
cell = table.cell(current_row, c)
# Get values from the DataFrame
prev_col = f'{kpi}_{previous_year}'
curr_col = f'{kpi}_{selected_year}'
prev_val = df_ytd_all.loc[dept_name, prev_col] if prev_col in df_ytd_all.columns else None
curr_val = df_ytd_all.loc[dept_name, curr_col] if curr_col in df_ytd_all.columns else None
tf = cell.text_frame
tf.clear()
# Previous year value
p1 = tf.paragraphs[0]
p1.text = f"{prev_val:.1f}%".replace('.',',') if pd.notna(prev_val) else ""
p1.font.color.rgb = RGBColor(0x80, 0x80, 0x80) # Grey
p1.font.size = Pt(18) # BIGGER FONT
p1.alignment = PP_ALIGN.CENTER
# Current year value
p2 = tf.add_paragraph()
p2.text = f"{curr_val:.1f}%".replace('.',',') if pd.notna(curr_val) else ""
p2.font.bold = True
p2.font.size = Pt(18) # BIGGER FONT
p2.alignment = PP_ALIGN.CENTER
# Color logic for current year value based on comparison with previous year
if pd.notna(curr_val) and pd.notna(prev_val):
if curr_val > prev_val:
p2.font.color.rgb = RGBColor(0x00, 0xB0, 0x50) # Green - improvement
elif curr_val < prev_val:
p2.font.color.rgb = RGBColor(0xC0, 0x00, 0x00) # Red - decline
else:
# Equal values - neutral color
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
elif pd.notna(curr_val):
# Only current year data available - use neutral color
p2.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black
# Set background color for OEE column
cell.fill.solid()
if kpi == 'OEE': # OEE column cells should be GRAY
cell.fill.fore_color.rgb = RGBColor(0xE5, 0xE5, 0xE5) # Light gray for OEE data cells
else:
cell.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White for other columns
cell.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
# COMMENTO Column (column 6, index 6)
cell_commento = table.cell(current_row, 6)
commento_text = df_ytd_all.loc[dept_name, 'COMMENTO'] if 'COMMENTO' in df_ytd_all.columns else ""
cell_commento.text = str(commento_text) if pd.notna(commento_text) else ""
cell_commento.fill.solid()
cell_commento.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # White background
p = cell_commento.text_frame.paragraphs[0]
p.font.color.rgb = RGBColor(0x00, 0x00, 0x00) # Black text
p.font.size = Pt(15) # Smaller font for comments
p.alignment = PP_ALIGN.LEFT # Left align for readability
cell_commento.vertical_anchor = MSO_VERTICAL_ANCHOR.MIDDLE
current_row += 1
# --- Table Border Styling (BLACK borders) ---
def set_table_black_borders(table):
"""
Remove any table style (so PPT won't override) and then
set all borders to a solid 1 pt black line on every cell.
"""
NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"
# 1) strip out the <a:tblStyle> so our borders aren't overridden
tbl = table._tbl
tblPr = tbl.tblPr
tblStyle = tblPr.find(qn('a:tblStyle'))
if tblStyle is not None:
tblPr.remove(tblStyle)
# 2) now inject our own <a:lnL>, <a:lnR>, <a:lnT>, <a:lnB> on every cell
for row in table.rows:
for cell in row.cells:
# cell._tc is the CT_Tc element; get or create its <a:tcPr>
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
for border_dir in ('lnL', 'lnR', 'lnT', 'lnB'):
# remove any existing
for elem in tcPr.findall(f'a:{border_dir}', namespaces={'a': NS_A}):
tcPr.remove(elem)
# build <a:lnX> with width=12700 EMU (1 pt), flat cap, solid compound
ln = etree.SubElement(
tcPr, f'{{{NS_A}}}{border_dir}',
{
'w': '12700', # 1pt
'cap': 'flat',
'cmpd': 'sng',
'algn': 'ctr'
}
)
# add <a:solidFill><a:srgbClr val="000000"/></a:solidFill>
solidFill = etree.SubElement(ln, f'{{{NS_A}}}solidFill')
etree.SubElement(solidFill, f'{{{NS_A}}}srgbClr', val='000000')
# ensure it's truly solid
etree.SubElement(ln, f'{{{NS_A}}}prstDash', val='solid')
# Apply BLACK borders
set_table_black_borders(table)
# --- Final Table Adjustments ---
# Set column widths - make COMMENTO column wider for readability
dept_width = 3.5 # Department column
kpi_width = 2.9 # Each KPI column
commento_width = 6.0 # COMMENTO column (wider)
table.columns[0].width = Inches(dept_width) # Department
for i in range(1, 6): # KPI columns
table.columns[i].width = Inches(kpi_width)
table.columns[6].width = Inches(commento_width) # COMMENTO
# Set row heights
for r in range(rows):
if r == 0: # Header row
table.rows[r].height = Inches(1.6)
else: # Department rows
table.rows[r].height = Inches(1.4)
print(f"βœ… Slide 'YTD OVERALL' added successfully.")
# --- Unit Test Checks (as print statements) ---
print("\n--- Running Checks ---")
print(f"Table has {len(table.rows)} rows (expected {num_departments + 1})")
print(f"Table has {len(table.columns)} columns (expected 7)")
print(f"Processed {num_departments} departments")
# Check if COMMENTO column has data
commento_check = any(df_ytd_all['COMMENTO'].notna()) if 'COMMENTO' in df_ytd_all.columns else False
print(f"COMMENTO column has data: {commento_check}")
# Check a value format
try:
val_check = table.cell(1,1).text_frame.paragraphs[1].text
if '%' in val_check and ',' in val_check:
print(f"βœ… Cell format check passed (e.g., '{val_check}')")
else:
print(f"⚠️ Cell format check failed (e.g., '{val_check}')")
except (IndexError, AttributeError):
print("⚠️ Could not check cell format.")
return prs
# =============================================================================
# 🎯 MAIN ENTRY POINT
# =============================================================================
if __name__ == "__main__":
# Example usage for testing
import sys
if len(sys.argv) < 4:
print("Usage: python generator.py <csv_path> <selected_month> <selected_year>")
print("Example: python generator.py data.csv 5 2024")
print("This will create a report for May 2024 covering all departments (ST, MS, DC)")
sys.exit(1)
csv_file = sys.argv[1]
try:
selected_month = int(sys.argv[2])
selected_year = int(sys.argv[3])
except ValueError:
print("❌ Error: selected_month and selected_year must be integers")
print("Example: python generator.py data.csv 5 2024")
sys.exit(1)
try:
output_path = make_ppt(csv_file, selected_month, selected_year)
if validate_ppt_output(output_path):
print(f"πŸŽ‰ Success! PowerPoint created at: {output_path}")
else:
print("❌ PowerPoint validation failed")
sys.exit(1)
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)