marcilioduarte's picture
Restore full project refactor with scripts and clean tracked artifacts
75b9644
"""Project-wide constants and feature configuration for credit risk."""
from dataclasses import dataclass
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DATA_RAW_PATH = PROJECT_ROOT / "data" / "raw" / "german_credit.csv"
DATA_PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
MODEL_DIR = PROJECT_ROOT / "model"
REPORTS_DIR = PROJECT_ROOT / "reports"
TARGET_COLUMN = "Creditability"
NOT_SELECTED_LABEL = "Not selected"
@dataclass(frozen=True)
class FeatureOption:
"""Single UI option and its one-hot encoded destination column."""
label: str
column: str
@dataclass(frozen=True)
class FeatureGroup:
"""Feature group used in the app and preprocessing logic."""
name: str
source_column: str
options: tuple[FeatureOption, ...]
@property
def labels(self) -> list[str]:
return [option.label for option in self.options]
def column_from_label(self, label: str | None) -> str | None:
if label is None:
return None
for option in self.options:
if option.label == label:
return option.column
return None
# This list defines both the app controls and the final model input schema.
FEATURE_GROUPS: tuple[FeatureGroup, ...] = (
FeatureGroup(
name="Account Balance",
source_column="Account Balance",
options=(
FeatureOption("No account", "Account Balance_1"),
FeatureOption("No balance", "Account Balance_2"),
FeatureOption("Some balance", "Account Balance_3"),
),
),
FeatureGroup(
name="Payment Status of Previous Credit",
source_column="Payment Status of Previous Credit",
options=(
FeatureOption("Some problems", "Payment Status of Previous Credit_1"),
FeatureOption("No problems in this bank", "Payment Status of Previous Credit_3"),
),
),
FeatureGroup(
name="Purpose",
source_column="Purpose",
options=(
FeatureOption("New car", "Purpose_1"),
FeatureOption("Other", "Purpose_4"),
),
),
FeatureGroup(
name="Value Savings/Stocks",
source_column="Value Savings/Stocks",
options=(
FeatureOption("No savings", "Value Savings/Stocks_1"),
FeatureOption("DM between [100, 1000]", "Value Savings/Stocks_3"),
FeatureOption("DM >= 1000", "Value Savings/Stocks_5"),
),
),
FeatureGroup(
name="Length of Current Employment",
source_column="Length of current employment",
options=(
FeatureOption("Below 1 year (or unemployed)", "Length of current employment_1"),
FeatureOption("Between 4 and 7 years", "Length of current employment_4"),
),
),
FeatureGroup(
name="Instalment Per Cent",
source_column="Instalment per cent",
options=(FeatureOption("Smaller than 20%", "Instalment per cent_4"),),
),
FeatureGroup(
name="Guarantors",
source_column="Guarantors",
options=(FeatureOption("No guarantors", "Guarantors_1"),),
),
FeatureGroup(
name="Duration in Current Address",
source_column="Duration in Current address",
options=(
FeatureOption("Less than a year", "Duration in Current address_1"),
FeatureOption("Between 1 and 4 years", "Duration in Current address_2"),
),
),
FeatureGroup(
name="Most Valuable Available Asset",
source_column="Most valuable available asset",
options=(
FeatureOption("Not available / no assets", "Most valuable available asset_1"),
FeatureOption("Ownership of house or land", "Most valuable available asset_4"),
),
),
FeatureGroup(
name="Concurrent Credits",
source_column="Concurrent Credits",
options=(FeatureOption("No further running credits", "Concurrent Credits_3"),),
),
FeatureGroup(
name="Type of Apartment",
source_column="Type of apartment",
options=(FeatureOption("Free apartment", "Type of apartment_1"),),
),
FeatureGroup(
name="Number of Credits at this Bank",
source_column="No of Credits at this Bank",
options=(FeatureOption("One credit", "No of Credits at this Bank_1"),),
),
FeatureGroup(
name="Occupation",
source_column="Occupation",
options=(FeatureOption("Unemployed or unskilled with no permanent", "Occupation_1"),),
),
)
# Keep this explicit list to guarantee deterministic input order for training/inference.
SELECTED_FEATURES: list[str] = [
option.column for group in FEATURE_GROUPS for option in group.options
]