File size: 3,627 Bytes
b077775
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from sklearn.linear_model import LogisticRegression
import pandas as pd


def compute_credit_score(
        coef_dict,
        num_credit_card, changed_credit_limit, delay_from_due_date, interest_rate, 
        outstanding_debt, credit_mix_good, credit_mix_standard
):
    """Computes a credit score based on the logistic regression coefficients and input features.

    Args:
        coef_dict (dict): A dictionary containing the logistic regression coefficients.
        num_credit_card (float): The number of credit cards.
        changed_credit_limit (float): The change in credit limit.
        delay_from_due_date (float): The delay from the due date.
        interest_rate (float): The interest rate.
        outstanding_debt (float): The outstanding debt.
        credit_mix_good (float): The proportion of good credit mix.
        credit_mix_standard (float): The proportion of standard credit mix.
    Returns:
        float: The computed credit score.
    """
    score = (
        coef_dict['Num_Credit_Card'] * num_credit_card +
        coef_dict['Changed_Credit_Limit'] * changed_credit_limit +
        coef_dict['Delay_from_due_date'] * delay_from_due_date +
        coef_dict['Interest_Rate'] * interest_rate +
        coef_dict['Outstanding_Debt'] * outstanding_debt +
        coef_dict['Credit_Mix_Good'] * credit_mix_good +
        coef_dict['Credit_Mix_Standard'] * credit_mix_standard
    )
    return score


def real_data_credit_model(X_train, y_train, X_test):
    """
    Trains a logistic regression model on the real training data and evaluates it on the real testing data.

    Args:
        X_train (pd.DataFrame): The training features.
        y_train (pd.Series): The training target variable.
        X_test (pd.DataFrame): The testing features.
    Returns:
        tuple: A tuple containing the computed credit scores and classifications for the testing data.
    """
    model = LogisticRegression(
        max_iter=1000, 
        class_weight='balanced'
    )
    model.fit(X_train, y_train)

    coefficients = model.coef_[0]
    coef_dict = dict(zip(X_train.columns, coefficients))

    score = pd.Series([
        compute_credit_score(
            coef_dict,
            row['Num_Credit_Card'], row['Changed_Credit_Limit'], row['Delay_from_due_date'], 
            row['Interest_Rate'], row['Outstanding_Debt'], row['Credit_Mix_Good'], row['Credit_Mix_Standard']
        ) for _, row in X_test.iterrows()
    ])
    classification = model.predict(X_test)

    return score, classification


def synthetic_data_credit_model(X_train, y_train, X_test):
    """Trains a logistic regression model on the synthetic training data and evaluates it on the real testing data.

    Args:
        X_train (pd.DataFrame): The synthetic training features.
        y_train (pd.Series): The synthetic training target variable.
        X_test (pd.DataFrame): The real testing features.
    Returns:
        tuple: A tuple containing the computed credit scores and classifications for the testing data.
    """
    model = LogisticRegression(
        max_iter=1_000, 
    )
    model.fit(X_train, y_train)

    coefficients = model.coef_[0]
    coef_dict = dict(zip(X_train.columns, coefficients))

    score = pd.Series([
        compute_credit_score(
            coef_dict,
            row['Num_Credit_Card'], row['Changed_Credit_Limit'], row['Delay_from_due_date'], 
            row['Interest_Rate'], row['Outstanding_Debt'], row['Credit_Mix_Good'], row['Credit_Mix_Standard']
        ) for _, row in X_test.iterrows()
    ])
    classification = model.predict(X_test)

    return score, classification