dataanalysis / app.py
Ansaribinhyder's picture
Test
3c118cd
raw
history blame
3.7 kB
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import io
def plot_excel_data(golden_data_file, manipulated_data_file):
"""
Reads two Excel files, plots the time-series data, and adds spec limits.
"""
if golden_data_file is None:
raise gr.Error("Please upload the 'Golden Data' Excel file.")
try:
# Read first 3 rows to extract limits from the golden data
limits_df1 = pd.read_excel(golden_data_file.name, nrows=4)
limits_df1 = limits_df1.drop(0)
# Data (skip first 3 rows)
df1 = pd.read_excel(golden_data_file.name)
df1 = df1.drop([0, 1, 2, 3])
df1 = df1.apply(pd.to_numeric, errors="coerce")
except Exception as e:
raise gr.Error(f"Error processing 'Golden Data' file: {e}")
# Build limits dictionary per column
ignore_cols = ["SITE_NUM", "PART_ID", "PASSFG", "SOFT_BIN", "T_TIME", "TEST_NUM"]
cols_to_plot = [col for col in limits_df1.columns if "_" in col and col not in ignore_cols]
limits_df1 = limits_df1.drop(columns=ignore_cols)
limits = {
col: {"LL": limits_df1.iloc[0][col], "UL": limits_df1.iloc[1][col]}
for col in limits_df1.columns
}
# Initialize a second dataframe if a file is provided
df2 = None
if manipulated_data_file is not None:
try:
df2 = pd.read_excel(manipulated_data_file.name)
df2 = df2.drop([0, 1, 2, 3])
df2 = df2.apply(pd.to_numeric, errors="coerce")
except Exception as e:
raise gr.Error(f"Error processing 'Manipulated Data' file: {e}")
# Plotting logic
n_cols = 3
n_rows = (len(df1.columns) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 3.5))
if n_rows * n_cols > len(df1.columns):
# Flatten axes array for easy iteration, then turn off unused subplots
for i in range(len(df1.columns), n_rows * n_cols):
axes.flatten()[i].axis('off')
for i, col in enumerate(cols_to_plot):
ax = axes.flatten()[i] if n_rows > 1 else axes[i]
# Golden data (Old)
x1 = np.arange(1, len(df1[col]) + 1)
y1 = pd.to_numeric(df1[col], errors="coerce").values
ax.plot(x1, y1, marker="o", linestyle="-", color="blue", label="Old")
# New data (if provided)
if df2 is not None and col in df2.columns:
x2 = np.arange(1, len(df2[col]) + 1)
y2 = pd.to_numeric(df2[col], errors="coerce").values
ax.plot(x2, y2, marker="s", linestyle="--", color="red", label="New")
# Spec limits
if col in limits:
ll, ul = limits[col]["LL"], limits[col]["UL"]
ax.axhline(ll, color="green", linestyle="--", linewidth=2, label="LL")
ax.axhline(ul, color="orange", linestyle="--", linewidth=2, label="UL")
ax.set_title(f"{col}")
ax.set_xlabel("Part # (sequence)")
ax.set_ylabel("Value")
ax.set_xticks(x1)
ax.grid(True, linestyle="--", alpha=0.7)
ax.legend()
plt.tight_layout()
return fig
# Gradio Interface
iface = gr.Interface(
fn=plot_excel_data,
inputs=[
gr.File(label="Upload IPM_Golden_Data.xlsx (Required)"),
gr.File(label="Upload IPM_Golden_Data_Manipulated.xlsx (Optional)"),
],
outputs=gr.Plot(label="Comparison Plots"),
title="Time-Series Data Comparison",
description="Upload two Excel files to compare time-series data and visualize specification limits. The first file (Golden Data) is required and will be used to extract the limits."
)
if __name__ == "__main__":
iface.launch()