""" Make Advisory Predictions with Explainability ============================================= GOVERNANCE CONSTRAINTS: - Advisory system only (NO autonomous decisions) - Human-in-the-loop is MANDATORY - All outputs are NON-BINDING suggestions - Full explainability required (confidence, feature importance, rule signals) Purpose: Generate advisory predictions with complete transparency """ import numpy as np import joblib import json import yaml from datetime import datetime # FROZEN DECISION BOUNDARIES - DO NOT MODIFY (from decision_spec.yaml) DECISION_BOUNDARIES = { 'damage_thresholds': { 'low': 5000, 'medium': 15000, 'high': 50000 }, 'risk_weights': { 'low': 1.0, 'medium': 1.5, 'high': 2.0 }, 'injury_multiplier': 1.8, 'severity_thresholds': { 'low': 5, 'medium': 15 } } def load_model_artifacts(): """ Load trained model and encoders. """ model = joblib.load('model.pkl') encoders = joblib.load('encoders.pkl') with open('model_metadata.json', 'r') as f: metadata = json.load(f) return model, encoders, metadata def generate_rule_signals(claim_type, damage_amount, injury_involved, risk_factor): """ Generate human-readable rule signals based on frozen decision boundaries. This provides transparent explanation of which rules are triggered. """ signals = [] # Damage threshold signals if damage_amount < DECISION_BOUNDARIES['damage_thresholds']['low']: signals.append(f"✓ Low damage (<${DECISION_BOUNDARIES['damage_thresholds']['low']:,}): ${damage_amount:,.2f}") elif damage_amount < DECISION_BOUNDARIES['damage_thresholds']['medium']: signals.append(f"⚠ Medium damage (${DECISION_BOUNDARIES['damage_thresholds']['low']:,}-${DECISION_BOUNDARIES['damage_thresholds']['medium']:,}): ${damage_amount:,.2f}") elif damage_amount < DECISION_BOUNDARIES['damage_thresholds']['high']: signals.append(f"⚠⚠ High damage (${DECISION_BOUNDARIES['damage_thresholds']['medium']:,}-${DECISION_BOUNDARIES['damage_thresholds']['high']:,}): ${damage_amount:,.2f}") else: signals.append(f"⚠⚠⚠ Very high damage (≥${DECISION_BOUNDARIES['damage_thresholds']['high']:,}): ${damage_amount:,.2f}") # Injury signal if injury_involved: signals.append(f"⚠ Injury involved (multiplier: {DECISION_BOUNDARIES['injury_multiplier']}x)") else: signals.append(f"✓ No injury involved") # Risk factor signal risk_weight = DECISION_BOUNDARIES['risk_weights'][risk_factor.lower()] if risk_factor.lower() == 'high': signals.append(f"⚠⚠ High risk factor (weight: {risk_weight}x)") elif risk_factor.lower() == 'medium': signals.append(f"⚠ Medium risk factor (weight: {risk_weight}x)") else: signals.append(f"✓ Low risk factor (weight: {risk_weight}x)") # Claim type signal if claim_type == "Liability": signals.append(f"⚠ Liability claim (additional multiplier applied)") else: signals.append(f"Claim type: {claim_type}") return signals def calculate_uncertainty(prediction_proba): """ Calculate prediction uncertainty using entropy. Returns: dict with uncertainty level and metrics """ # Calculate entropy epsilon = 1e-10 entropy = -np.sum(prediction_proba * np.log(prediction_proba + epsilon)) max_entropy = np.log(len(prediction_proba)) normalized_entropy = entropy / max_entropy # Determine uncertainty level if normalized_entropy < 0.3: level = "Low" interpretation = "Model is confident in this prediction" elif normalized_entropy < 0.6: level = "Medium" interpretation = "Model has moderate uncertainty - extra human scrutiny recommended" else: level = "High" interpretation = "Model is uncertain - REQUIRES careful human review" return { 'level': level, 'entropy': float(entropy), 'normalized_entropy': float(normalized_entropy), 'interpretation': interpretation, 'confidence_distribution': { 'Low': float(prediction_proba[0]), 'Medium': float(prediction_proba[1]) if len(prediction_proba) > 1 else 0.0, 'High': float(prediction_proba[2]) if len(prediction_proba) > 2 else 0.0 } } def get_feature_importance_for_prediction(model, feature_values): """ Get feature importance specific to this prediction. Uses the model's global feature importance as a proxy. For tree-based models, this represents which features were most influential. """ feature_names = ['claim_type', 'damage_amount', 'injury_involved', 'risk_factor'] global_importance = model.feature_importances_ # Create importance dictionary importance_dict = {} for name, importance, value in zip(feature_names, global_importance, feature_values): importance_dict[name] = { 'importance_score': float(importance), 'value': value, 'relative_percentage': float(importance / np.sum(global_importance) * 100) } # Sort by importance sorted_features = sorted(importance_dict.items(), key=lambda x: x[1]['importance_score'], reverse=True) return dict(sorted_features) def predict_claim(claim_type, damage_amount, injury_involved, risk_factor): """ Make advisory prediction for insurance claim. Args: claim_type: str - "Auto", "Property", "Health", or "Liability" damage_amount: float - Damage amount in USD injury_involved: bool - Whether injury is involved risk_factor: str - "low", "medium", or "high" Returns: dict with complete advisory prediction and explainability """ # Load model artifacts model, encoders, metadata = load_model_artifacts() # Validate inputs valid_claim_types = ['Auto', 'Property', 'Health', 'Liability'] valid_risk_factors = ['low', 'medium', 'high'] if claim_type not in valid_claim_types: raise ValueError(f"Invalid claim_type. Must be one of: {valid_claim_types}") if risk_factor not in valid_risk_factors: raise ValueError(f"Invalid risk_factor. Must be one of: {valid_risk_factors}") if damage_amount < 0: raise ValueError("damage_amount must be non-negative") # Encode inputs claim_type_encoded = encoders['claim_type'].transform([claim_type])[0] risk_factor_encoded = encoders['risk_factor'].transform([risk_factor])[0] injury_involved_encoded = int(injury_involved) # Create feature vector features = np.array([[ claim_type_encoded, damage_amount, injury_involved_encoded, risk_factor_encoded ]]) # Make prediction prediction = model.predict(features)[0] prediction_proba = model.predict_proba(features)[0] # Get severity label severity = encoders['target'].inverse_transform([prediction])[0] confidence = float(np.max(prediction_proba)) # Generate explainability artifacts rule_signals = generate_rule_signals(claim_type, damage_amount, injury_involved, risk_factor) uncertainty = calculate_uncertainty(prediction_proba) feature_importance = get_feature_importance_for_prediction( model, [claim_type, damage_amount, injury_involved, risk_factor] ) # Compile advisory output advisory_output = { # GOVERNANCE: All outputs clearly marked as ADVISORY 'governance_status': '⚠ ADVISORY ONLY - HUMAN CONFIRMATION REQUIRED', 'decision_authority': 'HUMAN (not machine)', 'binding': False, 'requires_human_review': True, # Model suggestion (NON-BINDING) 'model_suggestion': f"{severity} Severity (Advisory)", 'severity_level': severity, 'confidence_score': confidence, # Input summary 'input_summary': { 'claim_type': claim_type, 'damage_amount': f"${damage_amount:,.2f}", 'injury_involved': 'Yes' if injury_involved else 'No', 'risk_factor': risk_factor }, # Explainability 'rule_signals': rule_signals, 'feature_importance': feature_importance, 'uncertainty_assessment': uncertainty, # Prediction metadata 'prediction_metadata': { 'model_type': metadata['model_type'], 'model_architecture': metadata['model_architecture'], 'prediction_timestamp': datetime.now().isoformat(), 'dataset_source': metadata['dataset'] }, # Governance reminders 'governance_reminders': [ '⚠ This is an ADVISORY suggestion only', '⚠ Human decision-maker has FULL AUTHORITY to accept or override', '⚠ Human must independently evaluate the claim', '⚠ Human must document rationale for final decision', '⚠ All decisions must be logged in audit trail' ], # Decision boundaries reference 'decision_boundaries_reference': DECISION_BOUNDARIES } return advisory_output def format_advisory_output(output): """ Format advisory output for human-readable display. """ print("\n" + "="*70) print("INSURANCE CLAIM ADVISORY PREDICTION") print("="*70) print(f"\n{output['governance_status']}") print(f"Decision Authority: {output['decision_authority']}") print(f"Binding: {output['binding']}") print(f"\n{'='*70}") print("INPUT SUMMARY") print(f"{'='*70}") for key, value in output['input_summary'].items(): print(f" {key.replace('_', ' ').title()}: {value}") print(f"\n{'='*70}") print("MODEL ADVISORY SUGGESTION (Non-Binding)") print(f"{'='*70}") print(f" Suggested Severity: {output['model_suggestion']}") print(f" Model Confidence: {output['confidence_score']:.4f} ({output['confidence_score']*100:.2f}%)") print(f"\n{'='*70}") print("RULE SIGNALS (Transparent Decision Factors)") print(f"{'='*70}") for signal in output['rule_signals']: print(f" {signal}") print(f"\n{'='*70}") print("FEATURE IMPORTANCE (What Influenced This Suggestion)") print(f"{'='*70}") for feature, details in output['feature_importance'].items(): print(f" {feature}: {details['relative_percentage']:.1f}% importance") print(f"\n{'='*70}") print("UNCERTAINTY ASSESSMENT") print(f"{'='*70}") uncertainty = output['uncertainty_assessment'] print(f" Uncertainty Level: {uncertainty['level']}") print(f" Normalized Entropy: {uncertainty['normalized_entropy']:.4f}") print(f" Interpretation: {uncertainty['interpretation']}") print(f"\n Confidence Distribution:") for severity, prob in uncertainty['confidence_distribution'].items(): print(f" {severity}: {prob:.4f} ({prob*100:.2f}%)") print(f"\n{'='*70}") print("GOVERNANCE REMINDERS") print(f"{'='*70}") for reminder in output['governance_reminders']: print(f" {reminder}") print(f"\n{'='*70}\n") def main(): """ Example usage with sample claims. """ print("\n" + "="*70) print("ADVISORY PREDICTION SYSTEM - DEMONSTRATION") print("="*70) print("Model Type: Classical ML (XGBoost)") print("Governance: Human-in-the-Loop Required") print("="*70 + "\n") # Example 1: Low severity claim print("\n" + "="*70) print("EXAMPLE 1: Low Damage Auto Claim") print("="*70) output1 = predict_claim( claim_type="Auto", damage_amount=2500.0, injury_involved=False, risk_factor="low" ) format_advisory_output(output1) # Example 2: High severity claim print("\n" + "="*70) print("EXAMPLE 2: High Damage Liability Claim with Injury") print("="*70) output2 = predict_claim( claim_type="Liability", damage_amount=75000.0, injury_involved=True, risk_factor="high" ) format_advisory_output(output2) # Example 3: Medium severity claim print("\n" + "="*70) print("EXAMPLE 3: Medium Damage Property Claim") print("="*70) output3 = predict_claim( claim_type="Property", damage_amount=12000.0, injury_involved=False, risk_factor="medium" ) format_advisory_output(output3) print("\n" + "="*70) print("DEMONSTRATION COMPLETE") print("="*70) print("\nTo use this module in your code:") print(" from predict import predict_claim") print(" result = predict_claim('Auto', 5000.0, False, 'low')") print("="*70 + "\n") if __name__ == "__main__": main()