dataanalysis / SPC_System /spc_engine.py
Ansaribinhyder's picture
Adding SPC Engine
7bc9ae5
raw
history blame
4 kB
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
file = input("Select the file to analyze or type in the file name (Should be .xlsx)")
INPUT_EXCEL = file
df = pd.read_excel(INPUT_EXCEL)
################DICTIONARY BASED ON THE PARAMETER###########################
import json
df_new = df.head(3)
##To drop the extra columns
df_new = df_new.dropna(axis=1, how='all')
df_param = df_new.drop(["T_TIME","SITE_NUM"],axis=1)
parameter = list(df_param.columns)
##Convert the parameter to dictionary
new_dict = {param:{"values":[]} for param in parameter}
for prm in parameter:
temp = dict(zip(df_new['SITE_NUM'],df_new[prm]))
new_dict[prm].update(temp)
for prm in parameter:
df = df.dropna(axis=1, how='all')
__temp = df[prm].values[4:].tolist()
new_dict[prm]['values'].extend(__temp)
######To extract the Limits##############
df_new = df.head(3)
##To drop the extra columns
df_new = df_new.dropna(axis=1, how='all')
df_param = df_new.drop(["T_TIME","SITE_NUM"],axis=1)
parameter = list(df_param.columns)
##Convert the parameter to dictionary
new_dict = {param:{"values":[]} for param in parameter}
for prm in parameter:
temp = dict(zip(df_new['SITE_NUM'],df_new[prm]))
new_dict[prm].update(temp)
for prm in parameter:
df = df.dropna(axis=1, how='all')
__temp = df[prm].values[4:].tolist()
new_dict[prm]['values'].extend(__temp)
#TODO: No need to store json. If in case if we want to maintian history then we can store them
# json_data = json.dumps(new_dict)
# print(json_data)
# with open("output.json", "w") as f:
# json.dump(new_dict, f, indent=4)
#############Plotting###############
data = new_dict
##TODO: To plot for all the paramter. But need to check the way to handle it in the front end.
for param in parameter:
values = data[param]["values"]
limit_l = data[param]["LimitL"]
limit_u = data[param]["LimitU"]
unit = data[param]["Unit"]
import numpy as np
values = np.array(values)
mean = np.mean(values)
std = np.std(values, ddof=1) # Sample standard deviation
UCL = mean + 3*std
LCL = mean - 3*std
plt.figure(figsize=(7, 5))
# --- Box Plot ---
plt.boxplot(values, vert=True, patch_artist=True)
# --- Jittered Scatter Plot (spread raw values) ---
y = values
x = np.random.normal(1, 0.04, size=len(values)) # jitter around x=1
plt.scatter(x, y, alpha=0.6)
# --- Reference Lines ---
plt.axhline(mean, color='green', linestyle='--', label='Mean')
plt.axhline(UCL, color='red', linestyle='-.', label='UCL (Mean + 3σ)')
plt.axhline(LCL, color='red', linestyle='--', label='LCL (Mean - 3σ)')
# Optional spec limits
plt.axhline(limit_l, color='orange', linestyle=':', label='Lower Spec Limit')
plt.axhline(limit_u, color='orange', linestyle=':', label='Upper Spec Limit')
# Labels & Styling
plt.title(f"Box Plot with All Data Points - {param} ({unit})")
plt.ylabel(f"Value ({unit})")
plt.grid(True)
plt.legend()
plt.tight_layout()
# plt.show()
chart_path = f"./charts/control_chart_{param}.png"
plt.savefig(chart_path, dpi=300, bbox_inches='tight')
plt.close()
####line plot
# x_axis = range(1, len(values)+1)
# plt.figure(figsize=(10,5))
# plt.plot(x_axis,values, marker='o', linestyle='-', label='Measurements')
# plt.axhline(mean, color='green', linestyle='--', label='Mean')
# plt.axhline(UCL, color='red', linestyle='-.', label='UCL (Mean + 3σ)')
# plt.axhline(LCL, color='red', linestyle='--', label='LCL (Mean - 3σ)')
# plt.axhline(limit_l, color='orange', linestyle=':', label='Lower Spec Limit')
# plt.axhline(limit_u, color='orange', linestyle=':', label='Upper Spec Limit')
# plt.xticks(x_axis)
# plt.title(f"SPC Chart - {param} ({unit})")
# plt.xlabel("Sample Index")
# plt.ylabel(f"Value ({unit})")
# plt.legend()
# plt.grid(True)
# plt.tight_layout()
# plt.show()