File size: 3,421 Bytes
9a89b41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import torch
import numpy as np
import pandas as pd
from model import CardiovascularRNN
from sklearn.preprocessing import StandardScaler, LabelEncoder

class RiskPredictor:
    def __init__(self, model_path='cardiovascular_rnn_model.pth', csv_path='cardiovascular_risk_dataset.csv'):
        self.df = pd.read_csv(csv_path)
        
        # Determine feature names and order
        self.feature_names = self.df.drop(['Patient_ID', 'risk_category'], axis=1).columns.tolist()
        
        self.le_risk = LabelEncoder()
        self.le_risk.fit(self.df['risk_category'])
        
        self.le_smoking = LabelEncoder()
        self.le_smoking.fit(self.df['smoking_status'])
        
        self.le_family = LabelEncoder()
        self.le_family.fit(self.df['family_history_heart_disease'])
        
        self.scaler = StandardScaler()
        df_proc = self.df.drop(['Patient_ID', 'risk_category'], axis=1)
        df_proc['smoking_status'] = self.le_smoking.transform(df_proc['smoking_status'])
        df_proc['family_history_heart_disease'] = self.le_family.transform(df_proc['family_history_heart_disease'])
        self.scaler.fit(df_proc.values)
        
        self.input_size = 1
        self.hidden_size = 64
        self.num_layers = 2
        self.num_classes = len(self.le_risk.classes_)
        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = CardiovascularRNN(self.input_size, self.hidden_size, self.num_layers, self.num_classes).to(self.device)
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.eval()
        print(f"Model initialized with features: {self.feature_names}")

    def predict_single(self, data_dict):
        # Convert dictionary to DataFrame with explicit column order
        input_df = pd.DataFrame([data_dict])[self.feature_names]
        
        # Transform categorical
        input_df['smoking_status'] = self.le_smoking.transform(input_df['smoking_status'])
        input_df['family_history_heart_disease'] = self.le_family.transform(input_df['family_history_heart_disease'])
        
        # Scale
        input_scaled = self.scaler.transform(input_df.values)
        input_tensor = torch.FloatTensor(input_scaled).reshape(1, -1, 1).to(self.device)
        
        # Predict
        with torch.no_grad():
            output = self.model(input_tensor)
            probs = torch.softmax(output, dim=1)
            _, predicted = torch.max(output, 1)
            predicted_label = self.le_risk.inverse_transform([predicted.item()])[0]
            
        print(f"Prediction: {predicted_label} | Probabilities: {probs.cpu().numpy()}")
        return predicted_label

def predict():
    predictor = RiskPredictor()
    print("Model loaded successfully.")

    # Take a sample from the dataset for prediction
    df = pd.read_csv('cardiovascular_risk_dataset.csv')
    sample_row = df.drop(['Patient_ID', 'risk_category'], axis=1).iloc[0]
    sample_dict = sample_row.to_dict()
    true_label = df['risk_category'].iloc[0]
    
    predicted_label = predictor.predict_single(sample_dict)
    
    print(f"\nSample Data: {sample_dict}")
    print(f"True Risk Category: {true_label}")
    print(f"Predicted Risk Category: {predicted_label}")

if __name__ == "__main__":
    predict()