| """Project-wide constants and feature configuration for credit risk.""" |
|
|
| from dataclasses import dataclass |
| from pathlib import Path |
|
|
| PROJECT_ROOT = Path(__file__).resolve().parents[2] |
| DATA_RAW_PATH = PROJECT_ROOT / "data" / "raw" / "german_credit.csv" |
| DATA_PROCESSED_DIR = PROJECT_ROOT / "data" / "processed" |
| MODEL_DIR = PROJECT_ROOT / "model" |
| REPORTS_DIR = PROJECT_ROOT / "reports" |
|
|
| TARGET_COLUMN = "Creditability" |
| NOT_SELECTED_LABEL = "Not selected" |
|
|
|
|
| @dataclass(frozen=True) |
| class FeatureOption: |
| """Single UI option and its one-hot encoded destination column.""" |
|
|
| label: str |
| column: str |
|
|
|
|
| @dataclass(frozen=True) |
| class FeatureGroup: |
| """Feature group used in the app and preprocessing logic.""" |
|
|
| name: str |
| source_column: str |
| options: tuple[FeatureOption, ...] |
|
|
| @property |
| def labels(self) -> list[str]: |
| return [option.label for option in self.options] |
|
|
| def column_from_label(self, label: str | None) -> str | None: |
| if label is None: |
| return None |
| for option in self.options: |
| if option.label == label: |
| return option.column |
| return None |
|
|
|
|
| |
| FEATURE_GROUPS: tuple[FeatureGroup, ...] = ( |
| FeatureGroup( |
| name="Account Balance", |
| source_column="Account Balance", |
| options=( |
| FeatureOption("No account", "Account Balance_1"), |
| FeatureOption("No balance", "Account Balance_2"), |
| FeatureOption("Some balance", "Account Balance_3"), |
| ), |
| ), |
| FeatureGroup( |
| name="Payment Status of Previous Credit", |
| source_column="Payment Status of Previous Credit", |
| options=( |
| FeatureOption("Some problems", "Payment Status of Previous Credit_1"), |
| FeatureOption("No problems in this bank", "Payment Status of Previous Credit_3"), |
| ), |
| ), |
| FeatureGroup( |
| name="Purpose", |
| source_column="Purpose", |
| options=( |
| FeatureOption("New car", "Purpose_1"), |
| FeatureOption("Other", "Purpose_4"), |
| ), |
| ), |
| FeatureGroup( |
| name="Value Savings/Stocks", |
| source_column="Value Savings/Stocks", |
| options=( |
| FeatureOption("No savings", "Value Savings/Stocks_1"), |
| FeatureOption("DM between [100, 1000]", "Value Savings/Stocks_3"), |
| FeatureOption("DM >= 1000", "Value Savings/Stocks_5"), |
| ), |
| ), |
| FeatureGroup( |
| name="Length of Current Employment", |
| source_column="Length of current employment", |
| options=( |
| FeatureOption("Below 1 year (or unemployed)", "Length of current employment_1"), |
| FeatureOption("Between 4 and 7 years", "Length of current employment_4"), |
| ), |
| ), |
| FeatureGroup( |
| name="Instalment Per Cent", |
| source_column="Instalment per cent", |
| options=(FeatureOption("Smaller than 20%", "Instalment per cent_4"),), |
| ), |
| FeatureGroup( |
| name="Guarantors", |
| source_column="Guarantors", |
| options=(FeatureOption("No guarantors", "Guarantors_1"),), |
| ), |
| FeatureGroup( |
| name="Duration in Current Address", |
| source_column="Duration in Current address", |
| options=( |
| FeatureOption("Less than a year", "Duration in Current address_1"), |
| FeatureOption("Between 1 and 4 years", "Duration in Current address_2"), |
| ), |
| ), |
| FeatureGroup( |
| name="Most Valuable Available Asset", |
| source_column="Most valuable available asset", |
| options=( |
| FeatureOption("Not available / no assets", "Most valuable available asset_1"), |
| FeatureOption("Ownership of house or land", "Most valuable available asset_4"), |
| ), |
| ), |
| FeatureGroup( |
| name="Concurrent Credits", |
| source_column="Concurrent Credits", |
| options=(FeatureOption("No further running credits", "Concurrent Credits_3"),), |
| ), |
| FeatureGroup( |
| name="Type of Apartment", |
| source_column="Type of apartment", |
| options=(FeatureOption("Free apartment", "Type of apartment_1"),), |
| ), |
| FeatureGroup( |
| name="Number of Credits at this Bank", |
| source_column="No of Credits at this Bank", |
| options=(FeatureOption("One credit", "No of Credits at this Bank_1"),), |
| ), |
| FeatureGroup( |
| name="Occupation", |
| source_column="Occupation", |
| options=(FeatureOption("Unemployed or unskilled with no permanent", "Occupation_1"),), |
| ), |
| ) |
|
|
|
|
| |
| SELECTED_FEATURES: list[str] = [ |
| option.column for group in FEATURE_GROUPS for option in group.options |
| ] |
|
|
|
|