Spaces:
Runtime error
Runtime error
File size: 3,925 Bytes
6396193 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | """
Batch Prediction on Heart Attack Dataset
Loads the dataset, predicts risk for EVERY row, and saves the results.
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import load_model
# ── Config ────────────────────────────────────────────────────────────
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(BASE_DIR, "Heart Attack Data Set.csv")
MODEL_DIR = os.path.join(BASE_DIR, "saved_model")
OUTPUT_PATH = os.path.join(BASE_DIR, "heart_attack_with_predictions.csv")
# ── load Resources ────────────────────────────────────────────────────
print(f"📂 Loading dataset from: {DATA_PATH}")
df = pd.read_csv(DATA_PATH)
print(f"Start Loading model and scaler...")
model = load_model(os.path.join(MODEL_DIR, "heart_attack_model.keras"))
scaler = joblib.load(os.path.join(MODEL_DIR, "scaler.pkl"))
# ── Preprocess Features ───────────────────────────────────────────────
# We need to ensure we use the exact same columns as training (excluding target)
# Auto-detect target again to drop it
target_candidates = ['target', 'output', 'label', 'class', 'result']
target_col = None
for col in df.columns:
if col.strip().lower() in target_candidates:
target_col = col
break
if target_col:
print(f"Target column detected: '{target_col}' (Dropping for prediction)")
X = df.drop(columns=[target_col])
else:
X = df.copy()
# Handle missing values (same logic as training)
numeric_cols = X.select_dtypes(include=[np.number]).columns
X[numeric_cols] = X[numeric_cols].fillna(X[numeric_cols].median())
cat_cols = X.select_dtypes(exclude=[np.number]).columns
if len(cat_cols) > 0:
# simple fill for batch script
X[cat_cols] = X[cat_cols].fillna(X[cat_cols].mode().iloc[0])
# Scale features
X_scaled = scaler.transform(X)
# ── Make Predictions ──────────────────────────────────────────────────
print(f"🔮 Predicting on {len(df)} patients...")
predictions = model.predict(X_scaled, verbose=1)
# Add results to dataframe
df['Predicted_Probability'] = predictions.flatten()
df['Predicted_Risk_Label'] = (df['Predicted_Probability'] > 0.5).astype(int)
df['Risk_Level'] = df['Predicted_Probability'].apply(
lambda p: "High Risk" if p > 0.5 else "Low Risk"
)
# ── Save & Show ───────────────────────────────────────────────────────
df.to_csv(OUTPUT_PATH, index=False)
print(f"\n✅ Predictions saved to: {OUTPUT_PATH}")
print("\n── Sample Results (First 5 Rows) ─────────────────────────────")
# Show relevant columns + predictions
cols_to_show = ['age', 'sex', 'cp', 'chol', 'target', 'Predicted_Risk_Label', 'Risk_Level', 'Predicted_Probability']
# Filter columns that actually exist
cols_to_show = [c for c in cols_to_show if c in df.columns]
print(df[cols_to_show].head(10).to_string(index=False))
print("────────────────────────────────────────────────────────────────")
# Calculate Accuracy on this full dataset (since we have labels)
if target_col:
correct = (df[target_col] == df['Predicted_Risk_Label']).sum()
total = len(df)
print(f"\nOverall Accuracy on Full Dataset: {correct}/{total} ({correct/total:.2%})")
|