File size: 4,769 Bytes
75b9644 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | """Project-wide constants and feature configuration for credit risk."""
from dataclasses import dataclass
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DATA_RAW_PATH = PROJECT_ROOT / "data" / "raw" / "german_credit.csv"
DATA_PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
MODEL_DIR = PROJECT_ROOT / "model"
REPORTS_DIR = PROJECT_ROOT / "reports"
TARGET_COLUMN = "Creditability"
NOT_SELECTED_LABEL = "Not selected"
@dataclass(frozen=True)
class FeatureOption:
"""Single UI option and its one-hot encoded destination column."""
label: str
column: str
@dataclass(frozen=True)
class FeatureGroup:
"""Feature group used in the app and preprocessing logic."""
name: str
source_column: str
options: tuple[FeatureOption, ...]
@property
def labels(self) -> list[str]:
return [option.label for option in self.options]
def column_from_label(self, label: str | None) -> str | None:
if label is None:
return None
for option in self.options:
if option.label == label:
return option.column
return None
# This list defines both the app controls and the final model input schema.
FEATURE_GROUPS: tuple[FeatureGroup, ...] = (
FeatureGroup(
name="Account Balance",
source_column="Account Balance",
options=(
FeatureOption("No account", "Account Balance_1"),
FeatureOption("No balance", "Account Balance_2"),
FeatureOption("Some balance", "Account Balance_3"),
),
),
FeatureGroup(
name="Payment Status of Previous Credit",
source_column="Payment Status of Previous Credit",
options=(
FeatureOption("Some problems", "Payment Status of Previous Credit_1"),
FeatureOption("No problems in this bank", "Payment Status of Previous Credit_3"),
),
),
FeatureGroup(
name="Purpose",
source_column="Purpose",
options=(
FeatureOption("New car", "Purpose_1"),
FeatureOption("Other", "Purpose_4"),
),
),
FeatureGroup(
name="Value Savings/Stocks",
source_column="Value Savings/Stocks",
options=(
FeatureOption("No savings", "Value Savings/Stocks_1"),
FeatureOption("DM between [100, 1000]", "Value Savings/Stocks_3"),
FeatureOption("DM >= 1000", "Value Savings/Stocks_5"),
),
),
FeatureGroup(
name="Length of Current Employment",
source_column="Length of current employment",
options=(
FeatureOption("Below 1 year (or unemployed)", "Length of current employment_1"),
FeatureOption("Between 4 and 7 years", "Length of current employment_4"),
),
),
FeatureGroup(
name="Instalment Per Cent",
source_column="Instalment per cent",
options=(FeatureOption("Smaller than 20%", "Instalment per cent_4"),),
),
FeatureGroup(
name="Guarantors",
source_column="Guarantors",
options=(FeatureOption("No guarantors", "Guarantors_1"),),
),
FeatureGroup(
name="Duration in Current Address",
source_column="Duration in Current address",
options=(
FeatureOption("Less than a year", "Duration in Current address_1"),
FeatureOption("Between 1 and 4 years", "Duration in Current address_2"),
),
),
FeatureGroup(
name="Most Valuable Available Asset",
source_column="Most valuable available asset",
options=(
FeatureOption("Not available / no assets", "Most valuable available asset_1"),
FeatureOption("Ownership of house or land", "Most valuable available asset_4"),
),
),
FeatureGroup(
name="Concurrent Credits",
source_column="Concurrent Credits",
options=(FeatureOption("No further running credits", "Concurrent Credits_3"),),
),
FeatureGroup(
name="Type of Apartment",
source_column="Type of apartment",
options=(FeatureOption("Free apartment", "Type of apartment_1"),),
),
FeatureGroup(
name="Number of Credits at this Bank",
source_column="No of Credits at this Bank",
options=(FeatureOption("One credit", "No of Credits at this Bank_1"),),
),
FeatureGroup(
name="Occupation",
source_column="Occupation",
options=(FeatureOption("Unemployed or unskilled with no permanent", "Occupation_1"),),
),
)
# Keep this explicit list to guarantee deterministic input order for training/inference.
SELECTED_FEATURES: list[str] = [
option.column for group in FEATURE_GROUPS for option in group.options
]
|