Spaces:

AITestingWorkSpace
/

dataanalysis

Sleeping

App Files Files Community

dataanalysis / app.py

Ansaribinhyder

added new code for two tester data comparision

6930d8f 5 months ago

raw

history blame

9.42 kB

	from flask import Flask, render_template, request, send_file, redirect, url_for
	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np
	import io
	import os

	app = Flask(__name__)

	# --- Cache updated to hold two test files ---
	data_cache = {
	"df1": None, # Golden Data
	"df2_temp": None, # Test 1 Data
	"df3_temp": None, # Test 2 Data
	"limits": {},
	"cols": [],
	"golden_loaded": False,
	"test1_loaded": False,
	"test2_loaded": False,
	"comparison_file": None
	}
	# ----------------------------------------------


	def process_golden_file(golden_file):
	"""Load Golden data and extract limits."""
	# Use pandas ExcelFile to read multiple times from the file-like object
	xls = pd.ExcelFile(golden_file)
	limits_df1 = pd.read_excel(xls, nrows=4)
	df1 = pd.read_excel(xls) # Read the entire sheet again for data

	df1 = df1.drop([0, 1, 2, 3])
	df1 = df1.apply(pd.to_numeric, errors="coerce")

	limits_df1 = limits_df1.drop([0])
	ignore_cols = ["SITE_NUM", "PART_ID", "PASSFG", "SOFT_BIN", "T_TIME", "TEST_NUM"]
	# Identify columns to plot/analyze: must contain '_' and not be in ignore_cols
	cols_to_plot = [col for col in limits_df1.columns if "_" in str(col) and col not in ignore_cols]

	# Drop ignore columns from limits df to only get limits for relevant parameters
	limits_df1_filtered = limits_df1.drop(columns=ignore_cols, errors='ignore')

	limits = {
	col: {"LL": limits_df1_filtered.iloc[0][col], "UL": limits_df1_filtered.iloc[1][col]}
	for col in limits_df1_filtered.columns
	if pd.notna(limits_df1_filtered.iloc[0][col]) or pd.notna(limits_df1_filtered.iloc[1][col])
	}

	data_cache.update({
	"df1": df1,
	"limits": limits,
	"cols": cols_to_plot,
	"golden_loaded": True
	})
	return "Golden data loaded successfully!"


	def process_test_file(test_file):
	"""Load Test data."""
	df_test = pd.read_excel(test_file)
	df_test = df_test.drop([0, 1, 2, 3])
	df_test = df_test.apply(pd.to_numeric, errors="coerce")
	return df_test


	# --- Comparison function updated for two test files ---
	def generate_comparison_excel():
	"""Generate comparison Excel (mean, std, min, max for Golden, Test 1, and Test 2)."""
	df1 = data_cache["df1"]
	df2 = data_cache["df2_temp"]
	df3 = data_cache["df3_temp"]

	ignore_cols = ["SITE_NUM", "PART_ID", "PASSFG", "SOFT_BIN", "T_TIME", "TEST_NUM"]
	# Use columns identified during Golden file processing
	common_cols = data_cache["cols"]

	summary = []
	for col in common_cols:
	g_mean, t1_mean, t2_mean = df1[col].mean(), df2.get(col, pd.Series()).mean(), df3.get(col, pd.Series()).mean()
	g_std, t1_std, t2_std = df1[col].std(), df2.get(col, pd.Series()).std(), df3.get(col, pd.Series()).std()
	g_min, t1_min, t2_min = df1[col].min(), df2.get(col, pd.Series()).min(), df3.get(col, pd.Series()).min()
	g_max, t1_max, t2_max = df1[col].max(), df2.get(col, pd.Series()).max(), df3.get(col, pd.Series()).max()

	# Calculate differences relative to Golden mean
	diff1 = t1_mean - g_mean if pd.notna(t1_mean) and pd.notna(g_mean) else np.nan
	diff2 = t2_mean - g_mean if pd.notna(t2_mean) and pd.notna(g_mean) else np.nan

	summary.append([
	col, g_mean, t1_mean, t2_mean, diff1, diff2,
	g_std, t1_std, t2_std,
	g_min, t1_min, t2_min,
	g_max, t1_max, t2_max
	])

	comp_df = pd.DataFrame(summary, columns=[
	"Parameter", "Golden_Mean", "Test1_Mean", "Test2_Mean", "Test1_Mean_Diff", "Test2_Mean_Diff",
	"Golden_Std", "Test1_Std", "Test2_Std",
	"Golden_Min", "Test1_Min", "Test2_Min",
	"Golden_Max", "Test1_Max", "Test2_Max"
	])

	path = "comparison_result.xlsx"
	comp_df.to_excel(path, index=False)
	data_cache["comparison_file"] = path
	# -------------------------------------------------------------


	# --- Plot function updated for two test files ---
	def generate_plot(col):
	"""Generate and return a plot comparing Golden vs Test 1 vs Test 2."""
	df1, df2, df3 = data_cache["df1"], data_cache.get("df2_temp"), data_cache.get("df3_temp")
	limits = data_cache["limits"]

	plt.figure(figsize=(10, 6)) # Increased size for better visibility

	# Golden Plot
	x1 = np.arange(1, len(df1[col]) + 1)
	plt.plot(x1, df1[col], 'o-', label="Golden", color='blue', alpha=0.7)

	# Test 1 Plot
	if df2 is not None and col in df2.columns:
	x2 = np.arange(1, len(df2[col]) + 1)
	plt.plot(x2, df2[col], 's--', label="Test 1", color='red', alpha=0.7)

	# Test 2 Plot
	if df3 is not None and col in df3.columns:
	x3 = np.arange(1, len(df3[col]) + 1)
	plt.plot(x3, df3[col], 'x:', label="Test 2", color='purple', alpha=0.8)

	# Limits Plot
	if col in limits:
	ll, ul = limits[col].get("LL"), limits[col].get("UL")
	if pd.notna(ll):
	plt.axhline(ll, color='green', linestyle='--', label='LL', linewidth=1)
	if pd.notna(ul):
	plt.axhline(ul, color='orange', linestyle='--', label='UL', linewidth=1)

	plt.title(f"Parameter: {col}")
	plt.xlabel("Part # (sequence)")
	plt.ylabel("Value")
	plt.legend(fontsize='small', loc='best')
	plt.grid(True, linestyle='--', alpha=0.5)

	# Set x-ticks based on the largest dataset (assuming Golden is the reference)
	max_len = len(df1[col])
	if max_len > 1:
	plt.xticks(np.arange(1, max_len + 1, max(1, max_len // 10))) # Show max 10 ticks

	plt.tight_layout()

	buf = io.BytesIO()
	plt.savefig(buf, format='png', bbox_inches='tight')
	buf.seek(0)
	plt.close()
	return buf
	# -------------------------------------------------------------


	@app.route("/", methods=["GET", "POST"])
	def index():
	if request.method == "POST":

	# 1. Upload Golden file
	if not data_cache["golden_loaded"]:
	golden_file = request.files.get("golden_file")
	if not golden_file:
	return render_template("index.html", error="Please upload the Golden file.")
	try:
	process_golden_file(golden_file)
	return redirect(url_for("index"))
	except Exception as e:
	return render_template("index.html", error=f"Error loading Golden file: {e}")

	# 2. Upload Test 1 file
	elif not data_cache["test1_loaded"]:
	test1_file = request.files.get("test1_file")
	if not test1_file:
	return render_template("index.html", error="Please upload the first Test file (Test 1).", **data_cache)
	try:
	df2 = process_test_file(test1_file)
	data_cache["df2_temp"] = df2
	data_cache["test1_loaded"] = True
	return redirect(url_for("index"))
	except Exception as e:
	return render_template("index.html", error=f"Error processing Test 1 file: {e}", **data_cache)

	# 3. Upload Test 2 file
	elif not data_cache["test2_loaded"]:
	test2_file = request.files.get("test2_file")
	if not test2_file:
	return render_template("index.html", error="Please upload the second Test file (Test 2).", **data_cache)
	try:
	df3 = process_test_file(test2_file)
	data_cache["df3_temp"] = df3
	data_cache["test2_loaded"] = True

	# Generate comparison and move to plot view after all files are loaded
	generate_comparison_excel()

	return render_template(
	"plot.html",
	cols=data_cache["cols"],
	file_ready=True
	)
	except Exception as e:
	return render_template("index.html", error=f"Error processing Test 2 file: {e}", **data_cache)

	return render_template("index.html", **data_cache)


	@app.route("/plot_image/<col>")
	def plot_image(col):
	# df2 and df3 are checked inside generate_plot
	if data_cache.get("df1") is None:
	return "No Golden data loaded."
	buf = generate_plot(col)
	return send_file(buf, mimetype="image/png")


	@app.route("/download_comparison")
	def download_comparison():
	"""Download comparison Excel file."""
	path = data_cache.get("comparison_file")
	if path and os.path.exists(path):
	return send_file(path, as_attachment=True, download_name="three_way_comparison_result.xlsx")
	return "No comparison file available. Please upload all data first."


	@app.route("/reset_golden")
	def reset_golden():
	"""Reset all data."""
	global data_cache
	if data_cache.get("comparison_file") and os.path.exists(data_cache["comparison_file"]):
	os.remove(data_cache["comparison_file"])

	data_cache = {
	"df1": None, "df2_temp": None, "df3_temp": None,
	"limits": {}, "cols": [],
	"golden_loaded": False, "test1_loaded": False, "test2_loaded": False,
	"comparison_file": None
	}
	return redirect(url_for("index"))


	if __name__ == "__main__":
	# Ensure a local directory exists for comparison file (optional but good practice)
	# if not os.path.exists("temp"):
	# os.makedirs("temp")
	app.run(host="0.0.0.0", port=7860, debug=True)