Spaces:

manupawar6388
/

Heart-attack

Runtime error

Heart-attack / batch_predict.py

mouneshpawar6388

Initial commit for Hugging Face Space

6396193 15 days ago

3.93 kB

	"""
	Batch Prediction on Heart Attack Dataset
	Loads the dataset, predicts risk for EVERY row, and saves the results.
	"""

	import os
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

	import pandas as pd
	import numpy as np
	import joblib
	from tensorflow.keras.models import load_model

	# ── Config ────────────────────────────────────────────────────────────
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	DATA_PATH = os.path.join(BASE_DIR, "Heart Attack Data Set.csv")
	MODEL_DIR = os.path.join(BASE_DIR, "saved_model")
	OUTPUT_PATH = os.path.join(BASE_DIR, "heart_attack_with_predictions.csv")

	# ── load Resources ────────────────────────────────────────────────────
	print(f"📂 Loading dataset from: {DATA_PATH}")
	df = pd.read_csv(DATA_PATH)

	print(f"Start Loading model and scaler...")
	model = load_model(os.path.join(MODEL_DIR, "heart_attack_model.keras"))
	scaler = joblib.load(os.path.join(MODEL_DIR, "scaler.pkl"))

	# ── Preprocess Features ───────────────────────────────────────────────
	# We need to ensure we use the exact same columns as training (excluding target)
	# Auto-detect target again to drop it
	target_candidates = ['target', 'output', 'label', 'class', 'result']
	target_col = None
	for col in df.columns:
	if col.strip().lower() in target_candidates:
	target_col = col
	break

	if target_col:
	print(f"Target column detected: '{target_col}' (Dropping for prediction)")
	X = df.drop(columns=[target_col])
	else:
	X = df.copy()

	# Handle missing values (same logic as training)
	numeric_cols = X.select_dtypes(include=[np.number]).columns
	X[numeric_cols] = X[numeric_cols].fillna(X[numeric_cols].median())
	cat_cols = X.select_dtypes(exclude=[np.number]).columns
	if len(cat_cols) > 0:
	# simple fill for batch script
	X[cat_cols] = X[cat_cols].fillna(X[cat_cols].mode().iloc[0])

	# Scale features
	X_scaled = scaler.transform(X)

	# ── Make Predictions ──────────────────────────────────────────────────
	print(f"🔮 Predicting on {len(df)} patients...")
	predictions = model.predict(X_scaled, verbose=1)

	# Add results to dataframe
	df['Predicted_Probability'] = predictions.flatten()
	df['Predicted_Risk_Label'] = (df['Predicted_Probability'] > 0.5).astype(int)
	df['Risk_Level'] = df['Predicted_Probability'].apply(
	lambda p: "High Risk" if p > 0.5 else "Low Risk"
	)

	# ── Save & Show ───────────────────────────────────────────────────────
	df.to_csv(OUTPUT_PATH, index=False)
	print(f"\n✅ Predictions saved to: {OUTPUT_PATH}")

	print("\n── Sample Results (First 5 Rows) ─────────────────────────────")
	# Show relevant columns + predictions
	cols_to_show = ['age', 'sex', 'cp', 'chol', 'target', 'Predicted_Risk_Label', 'Risk_Level', 'Predicted_Probability']
	# Filter columns that actually exist
	cols_to_show = [c for c in cols_to_show if c in df.columns]

	print(df[cols_to_show].head(10).to_string(index=False))
	print("────────────────────────────────────────────────────────────────")

	# Calculate Accuracy on this full dataset (since we have labels)
	if target_col:
	correct = (df[target_col] == df['Predicted_Risk_Label']).sum()
	total = len(df)
	print(f"\nOverall Accuracy on Full Dataset: {correct}/{total} ({correct/total:.2%})")