hicxai-condition-2 / src /xai_methods.py
Suvh
Update to v1.1-chatty-luna (2025-12-07)
070061f
import shap
import numpy as np
import dice_ml
from anchor import anchor_tabular
import matplotlib.pyplot as plt
import os
from constraints import *
# Mode selection: 'full' requires dtreeviz; 'lite' skips it (good for Streamlit)
_MODE = os.getenv('HICXAI_MODE', 'lite').strip().lower()
# User-friendly feature name mappings (for international users)
FEATURE_DISPLAY_NAMES = {
# Workclass (employment type)
'workclass_Private': 'Private sector',
'workclass_Self-emp-not-inc': 'Self-employed',
'workclass_Self-emp-inc': 'Self-employed (business owner)',
'workclass_Federal-gov': 'Federal government',
'workclass_Local-gov': 'Local government',
'workclass_State-gov': 'State government',
'workclass_Without-pay': 'Unpaid work',
'workclass_Never-worked': 'Never worked',
# Education
'education_Preschool': 'Preschool',
'education_1st-4th': 'Elementary (1-4 years)',
'education_5th-6th': 'Elementary (5-6 years)',
'education_7th-8th': 'Middle school (7-8 years)',
'education_9th': 'High school (9th year)',
'education_10th': 'High school (10th year)',
'education_11th': 'High school (11th year)',
'education_12th': 'High school (12th year)',
'education_HS-grad': 'High school graduate',
'education_Some-college': 'Some college',
'education_Assoc-voc': 'Vocational degree',
'education_Assoc-acdm': 'Associate degree',
'education_Bachelors': 'Bachelor\'s degree',
'education_Masters': 'Master\'s degree',
'education_Prof-school': 'Professional degree',
'education_Doctorate': 'Doctorate',
'education_num': 'Education level',
# Marital status
'marital_status_Married-civ-spouse': 'Married',
'marital_status_Married-spouse-absent': 'Married (separated)',
'marital_status_Married-AF-spouse': 'Married (military)',
'marital_status_Never-married': 'Never married',
'marital_status_Divorced': 'Divorced',
'marital_status_Separated': 'Separated',
'marital_status_Widowed': 'Widowed',
# Occupation
'occupation_Tech-support': 'Technical support',
'occupation_Craft-repair': 'Skilled trades',
'occupation_Other-service': 'Service worker',
'occupation_Sales': 'Sales',
'occupation_Exec-managerial': 'Executive/Manager',
'occupation_Prof-specialty': 'Professional',
'occupation_Handlers-cleaners': 'Handler/Cleaner',
'occupation_Machine-op-inspct': 'Machine operator',
'occupation_Adm-clerical': 'Administrative',
'occupation_Farming-fishing': 'Farming/Fishing',
'occupation_Transport-moving': 'Transportation',
'occupation_Priv-house-serv': 'Household service',
'occupation_Protective-serv': 'Protective services',
'occupation_Armed-Forces': 'Military',
# Relationship
'relationship_Husband': 'Husband',
'relationship_Wife': 'Wife',
'relationship_Own-child': 'Child',
'relationship_Not-in-family': 'Not in family',
'relationship_Other-relative': 'Other relative',
'relationship_Unmarried': 'Unmarried partner',
# Race/Ethnicity
'race_White': 'White',
'race_Black': 'Black',
'race_Asian-Pac-Islander': 'Asian/Pacific Islander',
'race_Amer-Indian-Eskimo': 'Indigenous American',
'race_Other': 'Other',
# Sex
'sex_Male': 'Male',
'sex_Female': 'Female',
# Native Country
'native_country_United-States': 'United States',
'native_country_Cambodia': 'Cambodia',
'native_country_Canada': 'Canada',
'native_country_China': 'China',
'native_country_Columbia': 'Colombia',
'native_country_Cuba': 'Cuba',
'native_country_Dominican-Republic': 'Dominican Republic',
'native_country_Ecuador': 'Ecuador',
'native_country_El-Salvador': 'El Salvador',
'native_country_England': 'England',
'native_country_France': 'France',
'native_country_Germany': 'Germany',
'native_country_Greece': 'Greece',
'native_country_Guatemala': 'Guatemala',
'native_country_Haiti': 'Haiti',
'native_country_Holand-Netherlands': 'Netherlands',
'native_country_Honduras': 'Honduras',
'native_country_Hong': 'Hong Kong',
'native_country_Hungary': 'Hungary',
'native_country_India': 'India',
'native_country_Iran': 'Iran',
'native_country_Ireland': 'Ireland',
'native_country_Italy': 'Italy',
'native_country_Jamaica': 'Jamaica',
'native_country_Japan': 'Japan',
'native_country_Laos': 'Laos',
'native_country_Mexico': 'Mexico',
'native_country_Nicaragua': 'Nicaragua',
'native_country_Outlying-US(Guam-USVI-etc)': 'US Territory (Guam, Virgin Islands)',
'native_country_Peru': 'Peru',
'native_country_Philippines': 'Philippines',
'native_country_Poland': 'Poland',
'native_country_Portugal': 'Portugal',
'native_country_Puerto-Rico': 'Puerto Rico',
'native_country_Scotland': 'Scotland',
'native_country_South': 'South Korea',
'native_country_Taiwan': 'Taiwan',
'native_country_Thailand': 'Thailand',
'native_country_Trinadad&Tobago': 'Trinidad & Tobago',
'native_country_Vietnam': 'Vietnam',
'native_country_Yugoslavia': 'Former Yugoslavia',
# Numerical features
'age': 'Age',
'fnlwgt': 'Census weight',
'capital_gain': 'Capital gains',
'capital_loss': 'Capital losses',
'hours_per_week': 'Work hours per week',
}
def get_friendly_feature_name(feature_name):
"""Convert technical feature name to user-friendly display name"""
return FEATURE_DISPLAY_NAMES.get(feature_name, feature_name.replace('_', ' ').title())
# Visualization deps
try:
import dtreeviz # noqa: F401
import graphviz # noqa: F401
_DTREEVIZ_AVAILABLE = True
except Exception:
_DTREEVIZ_AVAILABLE = False
if _MODE == 'full':
raise ImportError(
"dtreeviz/graphviz are required in FULL mode. Install with conda: 'conda install -c conda-forge graphviz python-graphviz' and pip: 'pip install dtreeviz'"
)
def explain_with_shap(agent, question_id=None):
"""SHAP explanation using actual SHAP values from the model"""
try:
from ab_config import config
import pandas as pd
predicted_class = getattr(agent, 'predicted_class', 'unknown')
current_instance = agent.current_instance
# Get LOCAL SHAP values in probability space
# This shows how much each feature contributed to THIS user's prediction
# Note: agent.data['X_display'] contains RAW data; model was trained on PREPROCESSED data
# Get feature names from the trained model
if hasattr(agent.clf_display, 'feature_names_in_'):
feature_names = agent.clf_display.feature_names_in_.tolist()
else:
# Fallback: use raw feature names (will likely fail if model is trained on encoded data)
feature_names = agent.data['X_display'].columns.tolist()
shap_values_computed = None
instance_df = None
shap_contributions = {} # Feature -> contribution in probability space (percentage points)
base_value = None
pred_prob = None
# Compute SHAP in probability space (FAST - no hanging with TreeExplainer)
try:
# Prepare instance data
# current_instance should already be preprocessed (with one-hot encoded columns)
if current_instance is not None:
if hasattr(current_instance, 'to_frame'):
instance_df = current_instance.to_frame().T
elif hasattr(current_instance, 'to_dict'):
instance_df = pd.DataFrame([current_instance.to_dict()])
elif isinstance(current_instance, dict):
instance_df = pd.DataFrame([current_instance])
else:
instance_df = pd.DataFrame([current_instance])
# Ensure column order matches training data
# Add missing columns with 0 (for one-hot encoded features not present)
for col in feature_names:
if col not in instance_df.columns:
instance_df[col] = 0
instance_df = instance_df[feature_names]
# Initialize TreeExplainer (returns probability space for RandomForest)
explainer = shap.TreeExplainer(agent.clf_display)
# Compute local SHAP values for this instance
shap_values = explainer.shap_values(instance_df)
base_value_raw = explainer.expected_value
# Get predicted probability
pred_prob = float(agent.clf_display.predict_proba(instance_df)[0, 1])
# Extract SHAP contributions (percentage points) for positive class
# TreeExplainer returns probabilities directly for tree-based models
if isinstance(shap_values, list):
# Binary classification: [negative_class_shap, positive_class_shap]
shap_vals_array = shap_values[1][0]
base_value = float(base_value_raw[1])
else:
# Shape: (n_samples, n_features, n_classes) or (n_features, n_classes)
if len(shap_values.shape) == 3:
shap_vals_array = shap_values[0, :, 1]
base_value = float(base_value_raw[1])
else:
shap_vals_array = shap_values[:, 1]
base_value = float(base_value_raw[1])
# Store contributions in dictionary
for idx, feature in enumerate(feature_names):
shap_contributions[feature] = float(shap_vals_array[idx])
shap_values_computed = shap_vals_array
# Sanity check: contributions should sum approximately to prediction
approx_prob = base_value + sum(shap_contributions.values())
if abs(approx_prob - pred_prob) > 0.05:
print(f"Warning: SHAP additivity check: {approx_prob:.3f} vs {pred_prob:.3f}")
except Exception as e:
print(f"SHAP computation failed: {e}")
# Fallback to feature importances
if hasattr(agent.clf_display, 'feature_importances_'):
importances = agent.clf_display.feature_importances_
for idx, feature in enumerate(feature_names):
if importances[idx] > 0.001:
shap_contributions[feature] = float(importances[idx])
# Get prediction probability for fallback
if instance_df is not None:
pred_prob = float(agent.clf_display.predict_proba(instance_df)[0, 1])
base_value = 0.5 # Reasonable baseline
# Build natural language explanation with actual user values
feature_impacts = []
positive_factors = []
negative_factors = []
# Convert Series to dict if needed for easier access
instance_dict = None
if current_instance is not None:
if hasattr(current_instance, 'to_dict'):
instance_dict = current_instance.to_dict()
elif isinstance(current_instance, dict):
instance_dict = current_instance
else:
# Fallback: try to convert to dict
try:
instance_dict = dict(current_instance)
except:
instance_dict = {}
# For categorical features that are one-hot encoded, we need to find the original value
# by checking which encoded column has value 1
def get_categorical_value(feature_base):
"""Extract original categorical value from one-hot encoded columns"""
if not instance_dict:
return None
# Look for columns like 'workclass_Private', 'workclass_Self-emp-not-inc'
matching_cols = [col for col in instance_dict.keys() if col.startswith(f"{feature_base}_")]
for col in matching_cols:
if instance_dict.get(col) == 1 or instance_dict.get(col) == 1.0:
# Extract the value after the underscore
return col.split(f"{feature_base}_", 1)[1] if "_" in col else None
return None
# Check if we have any SHAP contribution data
if not shap_contributions:
return {
'type': 'error',
'explanation': "Unable to compute SHAP contributions. The model may not have sufficient data.",
'error': 'No SHAP values computed'
}
# Sort by absolute contribution (most impactful features)
sorted_features = sorted(shap_contributions.items(), key=lambda x: abs(x[1]), reverse=True)
# Prioritize capital_gain if user has significant gains (moves it to top of list)
capital_gain_val = instance_dict.get('capital_gain', 0) if instance_dict else 0
if capital_gain_val > 5000: # Significant capital gains
# Find capital_gain in sorted features and move to front
capital_idx = next((i for i, (f, _) in enumerate(sorted_features) if f == 'capital_gain'), None)
if capital_idx is not None and capital_idx > 0:
capital_item = sorted_features.pop(capital_idx)
sorted_features.insert(0, capital_item)
for feature, impact in sorted_features[:15]: # Check more features to get valid ones
# Skip technical features first (before any processing)
if feature in ['fnlwgt', 'education_num']: # fnlwgt is census weight, education_num is redundant
continue
# Check if this is a one-hot encoded feature (e.g., workclass_Private)
categorical_prefixes = ['workclass_', 'education_', 'marital_status_', 'occupation_',
'relationship_', 'race_', 'sex_', 'native_country_']
is_onehot = any(feature.startswith(prefix) for prefix in categorical_prefixes)
if is_onehot:
# Extract base feature and value (e.g., 'workclass_Private' -> base='workclass', value='Private')
for prefix in categorical_prefixes:
if feature.startswith(prefix):
feature_base = prefix.rstrip('_')
actual_value = feature.replace(prefix, '')
break
else:
# Regular numeric feature
actual_value = instance_dict.get(feature, None) if instance_dict else None
feature_base = feature
# Skip if value is missing
if actual_value is None or str(actual_value).strip() == '':
continue
# Create natural language description using GLOBAL FEATURE_DISPLAY_NAMES
friendly_feature = get_friendly_feature_name(feature if not is_onehot else feature_base)
# Format value with appropriate units/formatting
if feature_base == 'age':
formatted_value = f"{actual_value} years old"
elif feature_base == 'hours_per_week':
formatted_value = f"{actual_value} hours per week"
elif feature_base == 'capital_gain' or feature_base == 'capital_loss':
formatted_value = f"${actual_value:,}" if isinstance(actual_value, (int, float)) else str(actual_value)
else:
formatted_value = str(actual_value)
factor_desc = f"Your {friendly_feature.lower()} ({formatted_value})"
if impact > 0:
positive_factors.append(factor_desc)
feature_impacts.append(f"{feature} increases the prediction probability by {impact:.3f}")
else:
negative_factors.append(factor_desc)
feature_impacts.append(f"{feature} decreases the prediction probability by {abs(impact):.3f}")
# Stop once we have enough features (8-10 total)
if len(positive_factors) + len(negative_factors) >= 10:
break
# Generate explanation with REASONING based on approval/denial
# Extract key values for reasoning
def fmt_money(x):
return f"${x:,.0f}" if isinstance(x, (int, float)) else "N/A"
cg = instance_dict.get('capital_gain') if instance_dict else None
cl = instance_dict.get('capital_loss') if instance_dict else None
age = instance_dict.get('age') if instance_dict else None
hrs = instance_dict.get('hours_per_week') if instance_dict else None
edu = instance_dict.get('education') if instance_dict else None
# Determine if approved - check the actual loan decision, not model prediction
# The model predicts income level (>50K or <=50K), but loan approval is a separate business decision
if hasattr(agent, 'loan_approved') and agent.loan_approved is not None:
approved = agent.loan_approved
elif predicted_class in ['>50K', '1']:
# If >50K income, likely approved
approved = True
else:
# If <=50K income, likely denied
approved = False
# Build explanation with REASONING
# KEY INSIGHT: All features except capital_loss are positively correlated with approval
# They might not be "enough" but they don't hurt - only capital_loss can truly hurt
# Collect top features with their values
top_feature_list = []
for feature, impact in sorted_features[:8]:
# Get actual value
if feature in instance_dict:
value = instance_dict[feature]
else:
# Handle one-hot encoded
for prefix in ['workclass_', 'education_', 'marital_status_', 'occupation_', 'relationship_', 'race_', 'sex_', 'native_country_']:
if feature.startswith(prefix):
value = feature.replace(prefix, '')
break
else:
value = None
if value is not None:
top_feature_list.append((feature, value, impact))
# Approval threshold
tau = 0.50
gap_to_threshold = max(0.0, tau - pred_prob) if pred_prob is not None else 0.0
# ===== DATA-DRIVEN APPROACH: Extract structured data for LLM =====
# Separate positive and negative contributions
positive_contribs = [(f, v, delta) for f, v, delta in top_feature_list if delta > 0]
negative_contribs = [(f, v, delta) for f, v, delta in top_feature_list if delta < 0]
# Build structured data dictionary
structured_data = {
'decision': 'approved' if approved else 'denied',
'base_probability': f"{base_value*100:.1f}%" if base_value is not None else "N/A",
'predicted_probability': f"{pred_prob*100:.1f}%" if pred_prob is not None else "N/A",
'threshold': f"{tau*100:.0f}%",
'gap_to_threshold': f"{gap_to_threshold*100:.1f} pts" if gap_to_threshold > 0 else "0.0 pts",
'total_adjustment': f"{(pred_prob - base_value)*100:+.1f} pts" if (pred_prob is not None and base_value is not None) else "N/A",
'positive_factors': [],
'negative_factors': []
}
# Format positive contributors
for feature, value, delta in positive_contribs[:5]:
friendly_name = get_friendly_feature_name(feature)
factor_entry = {
'feature': friendly_name,
'impact': f"+{delta*100:.1f} pts",
'impact_numeric': delta * 100
}
if 'capital_gain' in feature or 'capital_loss' in feature:
factor_entry['value'] = fmt_money(value)
elif 'hours' in feature:
factor_entry['value'] = f"{value} hours/week"
elif 'age' in feature:
factor_entry['value'] = f"{value} years"
else:
factor_entry['value'] = str(value)
structured_data['positive_factors'].append(factor_entry)
# Format negative contributors
for feature, value, delta in negative_contribs[:5]:
friendly_name = get_friendly_feature_name(feature)
factor_entry = {
'feature': friendly_name,
'impact': f"{delta*100:.1f} pts",
'impact_numeric': delta * 100
}
if 'capital_gain' in feature or 'capital_loss' in feature:
factor_entry['value'] = fmt_money(value)
elif 'hours' in feature:
factor_entry['value'] = f"{value} hours/week"
elif 'age' in feature:
factor_entry['value'] = f"{value} years"
else:
factor_entry['value'] = str(value)
structured_data['negative_factors'].append(factor_entry)
# Generate explanation from data using LLM (respects anthropomorphism condition)
explanation = None
try:
from natural_conversation import generate_from_data
print(f"🤖 DEBUG: Generating SHAP explanation from data (anthropomorphic={config.show_anthropomorphic})...")
explanation = generate_from_data(
data=structured_data,
explanation_type='shap',
high_anthropomorphism=config.show_anthropomorphic
)
if explanation and len(explanation) > 50:
print(f"✅ DEBUG: Generated explanation ({len(explanation)} chars)")
else:
print(f"⚠️ DEBUG: LLM generation failed or too short")
explanation = None
except Exception as e:
print(f"❌ DEBUG: LLM generation failed: {e}")
explanation = None
# Fallback templates if LLM fails (preserves experimental conditions)
if not explanation:
print("⚠️ DEBUG: Using fallback template")
if config.show_anthropomorphic:
# High anthropomorphism fallback
if approved:
explanation = f"Thanks for waiting — your application was approved! 🎉\n\n"
explanation += f"Starting from {structured_data['base_probability']}, key factors helped:\n"
for factor in structured_data['positive_factors'][:4]:
explanation += f"• {factor['feature']} ({factor['value']}): **{factor['impact']}**\n"
explanation += f"\nFinal score: **{structured_data['predicted_probability']}** (threshold: {structured_data['threshold']}) ✨"
else:
explanation = f"I'm sorry this wasn't the news you were hoping for. 😔\n\n"
explanation += f"Starting from {structured_data['base_probability']}, here's what happened:\n\n"
if structured_data['positive_factors']:
explanation += "**What helped:**\n"
for factor in structured_data['positive_factors'][:3]:
explanation += f"• {factor['feature']} ({factor['value']}): **{factor['impact']}**\n"
if structured_data['negative_factors']:
explanation += "\n**What held back:**\n"
for factor in structured_data['negative_factors'][:2]:
explanation += f"• {factor['feature']} ({factor['value']}): **{factor['impact']}**\n"
explanation += f"\nFinal score: **{structured_data['predicted_probability']}** (needed: {structured_data['threshold']}, gap: {structured_data['gap_to_threshold']}) 💙"
else:
# Low anthropomorphism fallback
if approved:
explanation = "**Feature Impact Analysis**\n\n"
explanation += f"**Baseline Probability:** {structured_data['base_probability']}\n\n"
explanation += "**Key Contributing Factors:**\n"
for factor in structured_data['positive_factors'][:5]:
explanation += f"• **{factor['feature']}:** {factor['impact']} (value: {factor['value']})\n"
explanation += f"\n**Decision Summary:**\n"
explanation += f"Factors increased probability by {structured_data['total_adjustment']} to **{structured_data['predicted_probability']}**, "
explanation += f"exceeding the **{structured_data['threshold']}** approval threshold."
else:
explanation = "**Feature Impact Analysis**\n\n"
explanation += f"**Baseline Probability:** {structured_data['base_probability']}\n\n"
if structured_data['positive_factors']:
explanation += "**Positive Factors** (increased approval probability):\n"
for factor in structured_data['positive_factors'][:5]:
explanation += f"• **{factor['feature']}:** {factor['impact']} (value: {factor['value']})\n"
explanation += "\n"
if structured_data['negative_factors']:
explanation += "**Negative Factors** (decreased approval probability):\n"
for factor in structured_data['negative_factors'][:5]:
explanation += f"• **{factor['feature']}:** {factor['impact']} (value: {factor['value']})\n"
explanation += "\n"
explanation += "**Decision Summary:**\n"
explanation += f"Profile factors adjusted probability by {structured_data['total_adjustment']} to **{structured_data['predicted_probability']}**. "
explanation += f"Approval threshold: **{structured_data['threshold']}**, shortfall: **{structured_data['gap_to_threshold']}**."
result = {
'type': 'shap',
'explanation': explanation,
'feature_impacts': feature_impacts,
'prediction_class': predicted_class,
'method': 'local_shap_probability_space',
'shap_contributions': shap_contributions,
'base_value': base_value,
'predicted_probability': pred_prob,
'threshold': tau,
'gap_to_threshold': gap_to_threshold
}
# Include SHAP values if they were successfully computed (needed for visualizations)
if shap_values_computed is not None:
result['shap_values'] = shap_values_computed
result['instance_df'] = instance_df
return result
except Exception as e:
return {
'type': 'error',
'explanation': f"Feature importance analysis unavailable: {str(e)}",
'error': str(e)
}
def explain_with_shap_advanced(agent, instance_df):
"""Generate SHAP force plot and summary plot for the given instance."""
try:
explainer = shap.Explainer(agent.clf_display, agent.data['X_display'])
shap_values = explainer(instance_df)
# SHAP JS visualization (force plot)
shap.initjs()
force_plot = shap.force_plot(explainer.expected_value, shap_values.values[0], instance_df.iloc[0], matplotlib=True, show=False)
# SHAP summary plot
plt.figure()
shap.summary_plot(shap_values.values, instance_df, show=False)
summary_fig = plt.gcf()
plt.close()
return {
'type': 'shap_advanced',
'force_plot': force_plot,
'summary_fig': summary_fig,
'explanation': 'SHAP force plot and summary plot generated.'
}
except Exception as e:
return {
'type': 'error',
'explanation': f"Could not generate SHAP advanced visualizations: {str(e)}",
'error': str(e)
}
def explain_with_dice(agent, target_class=None, features='all'):
"""DiCE counterfactuals using actual DiCE library to generate counterfactuals"""
try:
from ab_config import config
import pandas as pd
current_pred = getattr(agent, 'predicted_class', 'unknown')
target_class = target_class or ('<=50K' if current_pred == '>50K' else '>50K')
current_instance = agent.current_instance
changes = []
# Try to use actual DiCE library
try:
# Prepare data for DiCE
X_train = agent.data['X_display']
y_train = agent.data['y_display']
# Create dataset for DiCE
train_df = pd.concat([X_train, y_train], axis=1)
# Define continuous and categorical features
continuous_features = ['age', 'hours_per_week', 'capital_gain', 'capital_loss', 'education_num']
categorical_features = [col for col in X_train.columns if col not in continuous_features]
# Create DiCE data object
d = dice_ml.Data(
dataframe=train_df,
continuous_features=continuous_features,
outcome_name='income'
)
# Create DiCE model
m = dice_ml.Model(model=agent.clf_display, backend='sklearn')
# Create DiCE explainer
exp = dice_ml.Dice(d, m, method='random')
# Get current instance as dataframe
if isinstance(current_instance, dict):
query_instance = pd.DataFrame([current_instance])
else:
query_instance = pd.DataFrame([current_instance])
# Ensure all features are present
for col in X_train.columns:
if col not in query_instance.columns:
query_instance[col] = 0
query_instance = query_instance[X_train.columns]
# Generate counterfactuals
target_value = 1 if '>50K' in target_class else 0
dice_exp = exp.generate_counterfactuals(
query_instance,
total_CFs=3,
desired_class=target_value
)
# Extract changes from counterfactuals using natural language
cf_df = dice_exp.cf_examples_list[0].final_cfs_df
# Check if counterfactuals were generated (handle DataFrame properly)
has_cf = cf_df is not None and isinstance(cf_df, pd.DataFrame) and len(cf_df) > 0
if has_cf:
# Compare with original instance and format naturally
for col in query_instance.columns:
# Extract scalar values properly
orig_val = query_instance[col].values[0]
cf_val = cf_df[col].values[0] if hasattr(cf_df[col], 'values') else cf_df[col].iloc[0]
# Convert to comparable types and check difference
try:
# Handle numeric comparison
if isinstance(orig_val, (int, float, np.number)) and isinstance(cf_val, (int, float, np.number)):
is_different = float(orig_val) != float(cf_val)
else:
# Handle string/categorical comparison
is_different = str(orig_val) != str(cf_val)
except Exception:
is_different = False
if is_different:
# Format with natural language using GLOBAL FEATURE_DISPLAY_NAMES
friendly_name = get_friendly_feature_name(col)
# Format values with appropriate units
if col == 'age':
from_val = f"{orig_val} years old"
to_val = f"{cf_val} years old"
elif col == 'hours_per_week':
from_val = f"{orig_val} hours per week"
to_val = f"{cf_val} hours per week"
elif 'capital' in col:
from_val = f"${orig_val:,}" if isinstance(orig_val, (int, float)) else str(orig_val)
to_val = f"${cf_val:,}" if isinstance(cf_val, (int, float)) else str(cf_val)
else:
from_val = str(orig_val)
to_val = str(cf_val)
changes.append(f"Your {friendly_name.lower()} (changing from {from_val} to {to_val})")
except Exception as dice_error:
# Fallback to rule-based analysis if DiCE fails
pass
# If DiCE didn't generate changes or failed, use intelligent rule-based system with natural language
if not changes and current_instance is not None:
# Convert Series to dict if needed
if hasattr(current_instance, 'to_dict'):
current_instance = current_instance.to_dict()
# Check education level
current_education = str(current_instance.get('education', '')).lower()
current_education_num = current_instance.get('education_num', 0)
if current_education_num < 13: # Less than Bachelor's
if 'hs-grad' in current_education or 'high school' in current_education:
changes.append("Your education level (completing a Bachelor's degree)")
elif current_education_num < 9:
changes.append("Your education level (completing High School and pursuing higher education)")
else:
changes.append("Your education level (pursuing a Bachelor's or higher degree)")
# Check occupation
current_occupation = str(current_instance.get('occupation', '')).lower()
if current_occupation and 'exec' not in current_occupation and 'prof' not in current_occupation and 'managerial' not in current_occupation:
changes.append(f"Your occupation (moving from {current_occupation} to a professional or managerial role)")
elif not current_occupation:
changes.append("Your occupation (moving to a professional or managerial role)")
elif not current_occupation:
changes.append("Your occupation (moving to a professional or managerial role)")
# Check working hours
current_hours = current_instance.get('hours_per_week', 0)
if current_hours < 40:
changes.append(f"Your work schedule (increasing from {current_hours} to 40+ hours per week)")
elif current_hours < 50:
changes.append(f"Your work schedule (increasing from {current_hours} to 50+ hours per week)")
# Check marital status
current_marital = str(current_instance.get('marital_status', '')).lower()
if current_marital and 'married' not in current_marital:
changes.append(f"Your marital status (currently {current_marital})")
elif not current_marital:
changes.append("Your marital status (married status associated with better outcomes)")
# Check capital gain
current_capital_gain = current_instance.get('capital_gain', 0)
if current_capital_gain < 5000:
changes.append(f"Your capital gains (increasing from ${current_capital_gain} to $5,000 or more)")
# Check age
current_age = current_instance.get('age', 0)
if current_age < 35:
changes.append(f"Your age (being {current_age} years old)")
# Fallback if no changes generated
if not changes:
changes = [
"Your education level (pursuing a Bachelor's or Master's degree)",
"Your occupation (moving into a professional or managerial role)",
"Your work schedule (working full-time, 40+ hours per week)"
]
# ===== DATA-DRIVEN APPROACH: Extract structured data for LLM =====
structured_data = {
'decision': current_pred,
'target_class': target_class,
'num_changes': len(changes),
'suggested_changes': changes[:5],
'is_denied': 'not' in str(current_pred).lower() or 'denied' in str(current_pred).lower() or '<' in str(current_pred)
}
# Generate explanation from data using LLM (respects anthropomorphism condition)
explanation = None
try:
from natural_conversation import generate_from_data
print(f"🤖 DEBUG (DiCE): Generating explanation from data (anthropomorphic={config.show_anthropomorphic})...")
explanation = generate_from_data(
data=structured_data,
explanation_type='dice',
high_anthropomorphism=config.show_anthropomorphic
)
if explanation and len(explanation) > 50:
print(f"✅ DEBUG: Generated counterfactual explanation ({len(explanation)} chars)")
else:
print(f"⚠️ DEBUG: LLM generation failed or too short")
explanation = None
except Exception as e:
print(f"❌ DEBUG: LLM generation failed: {e}")
explanation = None
# Fallback templates if LLM fails (preserves experimental conditions)
if not explanation:
print("⚠️ DEBUG: Using fallback template")
if config.show_anthropomorphic:
# High anthropomorphism fallback
if structured_data['is_denied']:
explanation = "💡 **What could help your application?**\n\n"
explanation += "Here are changes that could make a difference:\n\n"
for i, change in enumerate(changes[:5], 1):
explanation += f"**{i}.** {change}\n"
explanation += "\n✨ These factors show up in successful applications. Try the What-If Lab to explore more! 👍"
else:
explanation = "🔄 **What might change the outcome?**\n\n"
explanation += "Here's what could affect the decision:\n\n"
for i, change in enumerate(changes[:5], 1):
explanation += f"**{i}.** {change}\n"
explanation += "\n💭 Check out the What-If Lab to test scenarios! ✨"
else:
# Low anthropomorphism fallback
if structured_data['is_denied']:
explanation = "**Recommended Profile Modifications**\n\n"
for i, change in enumerate(changes[:5], 1):
explanation += f"**{i}.** {change}\n"
explanation += "\nAnalysis based on approved application patterns. Refer to What-If Lab for interactive testing."
else:
explanation = "**Profile Impact Analysis**\n\n"
for i, change in enumerate(changes[:5], 1):
explanation += f"**{i}.** {change}\n"
explanation += "\nData-driven insights from comparative analysis. Refer to What-If Lab for exploration."
# Ensure current_instance is a dict for return values # Ensure current_instance is a dict for return values
instance_dict = current_instance
if hasattr(current_instance, 'to_dict'):
instance_dict = current_instance.to_dict()
return {
'type': 'dice',
'explanation': explanation,
'target_class': target_class,
'changes': changes,
'method': 'counterfactual_analysis',
'current_values': {
'education_num': instance_dict.get('education_num', 0) if instance_dict else 0,
'hours_per_week': instance_dict.get('hours_per_week', 0) if instance_dict else 0,
'capital_gain': instance_dict.get('capital_gain', 0) if instance_dict else 0,
'age': instance_dict.get('age', 0) if instance_dict else 0
}
}
except Exception as e:
return {
'type': 'error',
'explanation': f"Counterfactual analysis unavailable: {str(e)}",
'error': str(e)
}
def explain_with_anchor(agent):
"""Anchor explanations using actual data patterns from the model"""
try:
from ab_config import config
import pandas as pd
current_pred = getattr(agent, 'predicted_class', 'unknown')
current_instance = agent.current_instance
# Extract actual rules from current instance
rules_friendly = []
rules_technical = []
if current_instance is not None and len(current_instance) > 0:
# Convert Series to dict for safe .get() access
if hasattr(current_instance, 'to_dict'):
instance_dict = current_instance.to_dict()
elif isinstance(current_instance, dict):
instance_dict = current_instance
else:
instance_dict = dict(current_instance) if current_instance is not None else {}
# Age rule
age = instance_dict.get('age', 0)
if age > 35:
friendly = get_friendly_feature_name('age')
rules_friendly.append(f"Your {friendly.lower()} ({age} years old)")
rules_technical.append(f"age > 35 (value: {age})")
elif age < 25:
friendly = get_friendly_feature_name('age')
rules_friendly.append(f"Your {friendly.lower()} ({age} years old)")
rules_technical.append(f"age < 25 (value: {age})")
# Education rule
education_num = instance_dict.get('education_num', 0)
education = instance_dict.get('education', 'Unknown')
if education_num >= 13:
friendly = get_friendly_feature_name('education_num')
rules_friendly.append(f"Your {friendly.lower()} ({education})")
rules_technical.append(f"education_num >= 13 (Bachelor's or higher)")
elif education_num < 9:
friendly = get_friendly_feature_name('education_num')
rules_friendly.append(f"Your {friendly.lower()} ({education})")
rules_technical.append(f"education_num < 9 (less than HS)")
# Hours rule
hours = instance_dict.get('hours_per_week', 0)
if hours >= 40:
friendly = get_friendly_feature_name('hours_per_week')
rules_friendly.append(f"Your {friendly.lower()} ({hours} hours per week)")
rules_technical.append(f"hours_per_week >= 40 (value: {hours})")
elif hours < 30:
friendly = get_friendly_feature_name('hours_per_week')
rules_friendly.append(f"Your {friendly.lower()} ({hours} hours per week)")
rules_technical.append(f"hours_per_week < 30 (value: {hours})")
# Marital status rule
marital = instance_dict.get('marital_status', '')
if 'Married' in marital:
friendly = get_friendly_feature_name('marital_status')
rules_friendly.append(f"Your {friendly.lower()} ({marital})")
rules_technical.append(f"marital_status = '{marital}'")
# Capital gain rule
capital_gain = instance_dict.get('capital_gain', 0)
if capital_gain > 5000:
friendly = get_friendly_feature_name('capital_gain')
rules_friendly.append(f"Your {friendly.lower()} (${capital_gain:,})")
rules_technical.append(f"capital_gain > 5000 (value: {capital_gain})")
elif capital_gain > 0:
friendly = get_friendly_feature_name('capital_gain')
rules_friendly.append(f"Your {friendly.lower()} (${capital_gain:,})")
rules_technical.append(f"capital_gain > 0 (value: {capital_gain})")
# Occupation rule
occupation = instance_dict.get('occupation', '')
if occupation:
if any(x in occupation for x in ['Exec', 'Prof', 'Managerial']):
friendly = get_friendly_feature_name('occupation')
rules_friendly.append(f"Your {friendly.lower()} ({occupation})")
rules_technical.append(f"occupation = '{occupation}' (professional)")
# Estimate precision and coverage based on feature importance
precision = 0.85 + (len(rules_friendly) * 0.02) # More rules = higher precision
coverage = max(0.10, min(0.25, 0.05 * len(rules_friendly)))
# Generate explanation with language differentiation
if config.show_anthropomorphic:
# High anthropomorphism
explanation = "📋 **Key factors in your decision:**\n\n"
explanation += "The decision was primarily influenced by:\n"
for i, rule in enumerate(rules_friendly[:5], 1):
explanation += f"{i}. {rule}\n"
explanation += f"\n💡 This pattern is accurate about {precision:.0%} of the time and applies to roughly {coverage:.0%} of similar applications."
else:
# Low anthropomorphism
explanation = "**Decision rule analysis:**\n\n"
explanation += "Primary decision factors:\n"
for i, rule in enumerate(rules_technical[:5], 1):
explanation += f"{i}. {rule}\n"
explanation += f"\nRule precision: {precision:.2f}, Coverage: {coverage:.2f}"
return {
'type': 'anchor',
'explanation': explanation,
'rules': rules_technical,
'rules_friendly': rules_friendly,
'precision': precision,
'coverage': coverage,
'method': 'rule_based_analysis'
}
except Exception as e:
return {
'type': 'error',
'explanation': f"Rule analysis unavailable: {str(e)}",
'error': str(e)
}
def explain_with_dtreeviz(agent, instance_df):
"""Generate dtreeviz visualization for the trained decision tree."""
try:
from sklearn.tree import DecisionTreeClassifier
# If RandomForest, use one tree for visualization
if hasattr(agent.clf_display, 'estimators_'):
tree = agent.clf_display.estimators_[0]
else:
tree = agent.clf_display
viz = dtreeviz.dtreeviz(
tree,
agent.data['X_display'],
agent.data['y_display'],
target_name='income',
feature_names=agent.data['features'],
class_names=agent.data['classes']
)
return {
'type': 'dtreeviz',
'graph': viz,
'explanation': 'Decision tree visualization generated.'
}
except Exception as e:
return {
'type': 'error',
'explanation': f"Could not generate dtreeviz visualization: {str(e)}",
'error': str(e)
}
def route_to_xai_method(agent, intent_result):
"""Route user question to appropriate XAI method based on intent AND experimental condition"""
from ab_config import config
if isinstance(intent_result, dict) and 'intent' in intent_result:
method = intent_result['intent']
# Normalize common aliases
if method in {"rule", "rules", "rule_based", "rule-based", "local_explanation"}:
method = 'anchor'
# Check experimental condition - only provide explanations that are enabled
if method == 'shap':
if config.explanation == "feature_importance": # Both condition 5 and 6
return explain_with_shap(agent, intent_result.get('label'))
else:
return {
'type': 'unavailable',
'explanation': "Feature importance explanations are not available in this version.",
'method': 'shap_disabled'
}
elif method == 'dice':
if config.show_counterfactual: # counterfactual condition
return explain_with_dice(agent)
else:
return {
'type': 'unavailable',
'explanation': "Counterfactual explanations are not available in this version.",
'method': 'dice_disabled'
}
elif method == 'anchor':
# Anchor is available in all conditions as baseline
return explain_with_anchor(agent)
else:
return {
'type': 'general',
'explanation': f"I understand you're asking about: {intent_result.get('matched_question', 'the model')}. Let me provide a general explanation.",
'method': 'general'
}
else:
return {
'type': 'error',
'explanation': "I'm not sure how to explain that. Could you rephrase your question?",
'suggestions': intent_result[2] if len(intent_result) > 2 else []
}