File size: 2,370 Bytes
6ccbbfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
src/predict.py
──────────────
Load saved model + scaler and run inference for a single student.
Returns prediction label, probabilities, and top feature importances.
"""
import os, json
import numpy as np
import joblib

BASE_DIR  = os.path.dirname(os.path.abspath(__file__))
MODEL_DIR = os.path.join(BASE_DIR, "..", "models")

RISK_LABELS = {0: "Low Risk", 1: "Medium Risk", 2: "High Risk"}


def load_artifacts():
    model  = joblib.load(os.path.join(MODEL_DIR, "best_model.pkl"))
    scaler = joblib.load(os.path.join(MODEL_DIR, "scaler.pkl"))
    with open(os.path.join(MODEL_DIR, "model_meta.json")) as f:
        meta = json.load(f)
    return model, scaler, meta


def predict_student(user_input: dict):
    """
    Full prediction pipeline for one student.

    Returns
    -------
    dict with keys:
        risk_label       str   e.g. "High Risk"
        risk_code        int   0/1/2
        confidence       float probability of predicted class
        probabilities    dict  {label: prob}
        top_features     list  [(feature_name, importance), …]
        model_name       str
    """
    from preprocessing import preprocess_single_input, FEATURES

    model, scaler, meta = load_artifacts()
    X = preprocess_single_input(user_input, scaler, FEATURES)

    probs     = model.predict_proba(X)[0]
    risk_code = int(np.argmax(probs))
    risk_label = RISK_LABELS[risk_code]
    confidence = float(probs[risk_code])

    probabilities = {RISK_LABELS[i]: float(p) for i, p in enumerate(probs)}

    # Feature importance from saved metadata
    top_features = meta.get("feature_importance", [])

    return {
        "risk_label":    risk_label,
        "risk_code":     risk_code,
        "confidence":    confidence,
        "probabilities": probabilities,
        "top_features":  top_features,
        "model_name":    meta.get("best_model_name", "Model"),
    }


if __name__ == "__main__":
    sample = {
        "study_time": 2, "absences": 12, "failures": 1,
        "G1": 9.0, "G2": 8.5, "internet": 1,
        "higher_edu": 1, "activities": 0, "romantic": 1,
        "family_support": "low", "gender": "M",
    }
    result = predict_student(sample)
    print(f"Prediction : {result['risk_label']}")
    print(f"Confidence : {result['confidence']:.1%}")
    print(f"Top factors: {result['top_features'][:3]}")