Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| def get_model_expected_features(): | |
| """Return all features that the trained model expects""" | |
| # Based on the original training data, these are typical features | |
| expected_features = [ | |
| # Basic transaction features | |
| 'TransactionAmt', 'TransactionDT', | |
| # Card features | |
| 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', | |
| # Address features | |
| 'addr1', 'addr2', | |
| # Distance features | |
| 'dist1', 'dist2', | |
| # Email features | |
| 'P_emaildomain', 'R_emaildomain', | |
| # Count features (C1-C14) | |
| 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', | |
| # Time delta features (D1-D15) | |
| 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', | |
| # Match features (M1-M9) | |
| 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', | |
| # Vesta features (V1-V339) - Full range based on error message | |
| *[f'V{i}' for i in range(1, 340)], | |
| # Identity features (id_01 to id_38) | |
| *[f'id_{i:02d}' for i in range(1, 39)], | |
| # Device features | |
| 'DeviceType', 'DeviceInfo', | |
| # Additional features that might be present | |
| 'ProductCD' | |
| ] | |
| return expected_features | |
| def fill_missing_features(transaction_data): | |
| """Fill missing features with appropriate default values""" | |
| # Get all expected features | |
| expected_features = get_model_expected_features() | |
| # Default values for different feature types | |
| defaults = { | |
| # Numeric features default to 0 or reasonable values | |
| 'card1': 13553, 'card2': 150.0, 'card3': 150.0, 'card5': 142.0, | |
| 'addr1': 325.0, 'addr2': 87.0, | |
| 'dist1': 19.0, 'dist2': 19.0, | |
| # Count features (C1-C14) - mostly 0 or 1 | |
| **{f'C{i}': 0.0 for i in range(1, 15)}, | |
| 'C1': 1.0, 'C2': 1.0, 'C6': 1.0, 'C9': 1.0, 'C11': 1.0, 'C12': 1.0, 'C13': 1.0, 'C14': 1.0, | |
| # Time delta features (D1-D15) - mostly 0 | |
| **{f'D{i}': 0.0 for i in range(1, 16)}, | |
| 'D5': 20.0, # Common non-zero value | |
| # Match features (M1-M9) - mostly F with some T | |
| **{f'M{i}': 'F' for i in range(1, 10)}, | |
| 'M1': 'T', 'M2': 'T', 'M3': 'T', | |
| 'M4': 'M0', # Special case | |
| # Vesta features (V1-V339) - default to 1.0 | |
| **{f'V{i}': 1.0 for i in range(1, 340)}, | |
| # Identity features (id_01 to id_38) - default to 0.0 | |
| **{f'id_{i:02d}': 0.0 for i in range(1, 39)}, | |
| # Categorical features | |
| 'card4': 'visa', | |
| 'card6': 'credit', | |
| 'P_emaildomain': 'gmail.com', | |
| 'R_emaildomain': 'gmail.com', | |
| 'DeviceType': 'desktop', | |
| 'DeviceInfo': 'Windows', | |
| 'ProductCD': 'W', | |
| # Transaction defaults | |
| 'TransactionDT': 86400, # Default timestamp | |
| } | |
| # Create complete transaction data | |
| complete_data = {} | |
| # First, add all provided data | |
| complete_data.update(transaction_data) | |
| # Then fill missing features with defaults | |
| for feature in expected_features: | |
| if feature not in complete_data: | |
| complete_data[feature] = defaults.get(feature, 0.0) | |
| return complete_data | |
| def create_simple_transaction(amount, card_type="visa", email_domain="gmail.com", hour=12): | |
| """Create a transaction with minimal inputs and smart defaults""" | |
| transaction_data = { | |
| 'TransactionAmt': float(amount), | |
| 'TransactionDT': hour * 3600, | |
| 'card4': card_type, | |
| 'P_emaildomain': email_domain, | |
| 'R_emaildomain': email_domain, | |
| } | |
| # Fill all missing features | |
| return fill_missing_features(transaction_data) | |
| def validate_features(df, expected_features): | |
| """Validate that DataFrame has all expected features""" | |
| missing_features = set(expected_features) - set(df.columns) | |
| extra_features = set(df.columns) - set(expected_features) | |
| return { | |
| 'missing': list(missing_features), | |
| 'extra': list(extra_features), | |
| 'is_valid': len(missing_features) == 0 | |
| } |