Spaces:

Suvh
/

hicxai-condition-2

Sleeping

Suvh

Update to v1.1-chatty-luna (2025-12-07)

070061f 5 days ago

14.9 kB

	import logging
	import json
	import random
	import re
	import os
	import pandas as pd
	import shap
	import sklearn
	import pickle
	from constraints import *
	from nlu import NLU
	import json
	from answer import Answers

	# Import natural conversation enhancer
	try:
	from natural_conversation import enhance_response
	NATURAL_CONVERSATION_AVAILABLE = True
	except ImportError:
	NATURAL_CONVERSATION_AVAILABLE = False
	def enhance_response(response, context=None, response_type="explanation"):
	return response

	class Agent:
	def __init__(self, nlu_model=None):
	# Core state
	self.dataset = "adult"
	self.current_instance = None
	self.clf = None
	self.predicted_class = None
	self.mode = None
	self.data = {"X": None, "y": None, "features": None, "classes": None}

	# NLU setup: prefer provided model, else use config, else default
	config_path = os.path.join(os.path.dirname(__file__), 'nlu_config.json')
	if nlu_model is not None:
	self.nlu_model = nlu_model
	elif os.path.exists(config_path):
	with open(config_path, 'r') as f:
	nlu_config = json.load(f)
	self.nlu_model = NLU(model_type=nlu_config.get('model_type', 'sentence_transformers'), model_path=nlu_config.get('model_path'))
	else:
	self.nlu_model = NLU()

	# UI/state helpers
	self.list_node = []
	self.clf_display = None
	self.l_exist_classes = None
	self.l_exist_features = None
	self.l_instances = None
	self.df_display_instance = None
	self.current_feature = None
	self.preprocessor = None

	# Feature requirements for user input flows
	self.required_features = [
	'age', 'workclass', 'education', 'education_num', 'marital_status',
	'occupation', 'relationship', 'race', 'sex', 'capital_gain',
	'capital_loss', 'hours_per_week', 'native_country'
	]
	self.user_features = {}

	# Load data and train model (sets self.clf and self.clf_display)
	self.load_adult_dataset()
	self.train_model()

	def load_adult_dataset(self):
	data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'adult.data')
	info_path = os.path.join(os.path.dirname(__file__), '..', 'dataset_info', 'adult.json')
	columns = [
	'age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status',
	'occupation', 'relationship', 'race', 'sex', 'capital_gain', 'capital_loss',
	'hours_per_week', 'native_country', 'income'
	]
	self.data['X_display'] = pd.read_csv(data_path, names=columns, skipinitialspace=True)
	self.data['y_display'] = self.data['X_display']['income']
	self.data['X_display'].drop(['income'], axis=1, inplace=True)
	with open(info_path, 'r') as f:
	self.data['info'] = json.load(f)
	self.data['classes'] = ['<=50K', '>50K']
	self.data['features'] = self.data['X_display'].columns.tolist()
	self.data['feature_names'] = self.data['features']
	self.data['map'] = {}

	def train_model(self):
	# Ensure model directory exists
	model_dir = os.path.join(os.path.dirname(__file__), '..', 'models')
	os.makedirs(model_dir, exist_ok=True)
	model_path = os.path.join(model_dir, 'RandomForest.pkl')
	if os.path.exists(model_path):
	try:
	self.clf = pickle.load(open(model_path, 'rb'))
	self.clf_display = self.clf
	except Exception as e:
	print(f"⚠️ Failed to load existing model ({e}). Retraining...")
	from preprocessing import preprocess_adult
	df = pd.concat([self.data['X_display'], self.data['y_display']], axis=1)
	df_clean = preprocess_adult(df)
	X = df_clean.drop('income', axis=1)
	y = df_clean['income']
	from sklearn.ensemble import RandomForestClassifier
	clf = RandomForestClassifier(n_estimators=200, random_state=42)
	clf.fit(X, y)
	self.clf = clf
	self.clf_display = clf
	pickle.dump(clf, open(model_path, 'wb'))
	else:
	from preprocessing import preprocess_adult
	df = pd.concat([self.data['X_display'], self.data['y_display']], axis=1)
	df_clean = preprocess_adult(df)
	X = df_clean.drop('income', axis=1)
	y = df_clean['income']
	from sklearn.ensemble import RandomForestClassifier
	self.clf = RandomForestClassifier(n_estimators=100, random_state=42)
	self.clf.fit(X, y)
	# Persist the trained model for faster subsequent runs
	with open(model_path, 'wb') as f:
	pickle.dump(self.clf, f)
	self.clf_display = self.clf

	# (Removed duplicate __init__; initialization handled above)

	def handle_user_input(self, user_input):
	"""Handle user input for XAI explanations (used by loan assistant for explanations)"""
	# Step 1: Intent classification and XAI routing using enhanced NLU
	try:
	intent_result, confidence, suggestions = self.nlu_model.classify_intent(user_input)
	from constraints import SUGGEST_SIMILAR_QUESTIONS_MSG, REPHRASE_QUESTION_MSG

	# Route to appropriate XAI method based on intent
	if isinstance(intent_result, dict) and 'intent' in intent_result:
	# Ensure we have a current instance for explanation
	if self.current_instance is None:
	self.select_random_instance()

	# Import the routing function
	try:
	from xai_methods import route_to_xai_method
	explanation_result = route_to_xai_method(self, intent_result)
	base_explanation = explanation_result.get('explanation', 'Sorry, I could not generate an explanation.')

	# Enhance with natural conversation if available
	if NATURAL_CONVERSATION_AVAILABLE:
	context = {
	'explanation_type': intent_result.get('intent', 'general'),
	'user_question': user_input,
	'confidence': intent_result.get('confidence', 0)
	}
	return enhance_response(base_explanation, context, "explanation")

	return base_explanation
	except ImportError:
	# Fallback if routing function not available
	base_explanation = self._generate_basic_explanation(intent_result)

	# Enhance fallback explanation too
	if NATURAL_CONVERSATION_AVAILABLE:
	context = {
	'explanation_type': 'basic',
	'user_question': user_input,
	'confidence': 0.5
	}
	return enhance_response(base_explanation, context, "explanation")

	return base_explanation

	elif intent_result == 'unknown' and suggestions:
	suggestions_str = "\n".join([f"{idx}. {q}" for idx, q in enumerate(suggestions, 1)])
	return SUGGEST_SIMILAR_QUESTIONS_MSG.format(suggestions=suggestions_str)
	else:
	return REPHRASE_QUESTION_MSG

	except Exception as e:
	return f"I'm having trouble processing that question. Could you try asking it differently? Error: {str(e)}"

	def _generate_basic_explanation(self, intent_result):
	"""Generate basic explanation when XAI methods are not available"""
	if self.current_instance is None or self.predicted_class is None:
	return "I need a specific instance to explain. Please make sure a prediction has been made."

	# Basic explanation based on the current instance
	explanation = f"Based on your profile, the decision was: {self.predicted_class}\n\n"
	explanation += "Key factors in this decision include:\n"

	# Highlight some key features
	key_features = ['age', 'education', 'hours_per_week', 'occupation', 'marital_status']
	for feature in key_features:
	if feature in self.current_instance:
	value = self.current_instance[feature]
	explanation += f"• {feature.replace('_', ' ').title()}: {value}\n"

	explanation += "\nThis is a simplified explanation. For more detailed analysis, specific XAI methods would provide deeper insights."
	return explanation

	def select_random_instance(self):
	"""Select a random instance from the dataset for explanation"""
	if self.data.get('X_display') is not None and len(self.data['X_display']) > 0:
	random_idx = random.randint(0, len(self.data['X_display']) - 1)
	self.df_display_instance = self.data['X_display'].iloc[[random_idx]]
	self.current_instance = self.df_display_instance.iloc[0].to_dict()

	# Make prediction for this instance
	if self.clf_display is not None:
	self.predicted_class = self.clf_display.predict(self.df_display_instance)[0]

	def get_visualization(self, viz_type, instance_df=None):
	"""
	Route advanced visualization requests to Answers class.
	viz_type: 'shap_advanced' or 'dtreeviz'
	instance_df: DataFrame for the instance to visualize
	"""
	answers = Answers(
	list_node=self.list_node,
	clf=self.clf,
	clf_display=self.clf_display,
	current_instance=self.current_instance,
	question=None,
	l_exist_classes=self.l_exist_classes,
	l_exist_features=self.l_exist_features,
	l_instances=self.l_instances,
	data=self.data,
	df_display_instance=self.df_display_instance,
	predicted_class=self.predicted_class,
	preprocessor=self.preprocessor
	)
	return answers.answer(viz_type, instance_df=instance_df)

	def handle_user_input(self, user_input, instance_df=None):
	# Step 1: Refined feature extraction using regex and synonyms
	feature_synonyms = {
	'age': ['age', 'years old'],
	'workclass': ['workclass', 'work type', 'job type'],
	'education': ['education', 'degree'],
	'education_num': ['education num', 'education number', 'years of education'],
	'marital_status': ['marital status', 'married', 'single', 'relationship status'],
	'occupation': ['occupation', 'job', 'profession'],
	'relationship': ['relationship'],
	'race': ['race', 'ethnicity'],
	'sex': ['sex', 'gender'],
	'capital_gain': ['capital gain', 'gain'],
	'capital_loss': ['capital loss', 'loss'],
	'hours_per_week': ['hours per week', 'weekly hours', 'work hours'],
	'native_country': ['native country', 'country', 'nationality']
	}
	# Try to extract feature-value pairs from user input
	for feature, synonyms in feature_synonyms.items():
	for syn in synonyms:
	pattern = rf"{syn}[:=]?\s*([\w\-\+]+)"
	match = re.search(pattern, user_input, re.IGNORECASE)
	if match:
	self.user_features[feature] = match.group(1)
	# Check for missing features
	from constraints import CLARIFY_FEATURE_MSG
	missing = [f for f in self.required_features if f not in self.user_features]
	if missing:
	next_feat = missing[0]
	return CLARIFY_FEATURE_MSG.format(feature=next_feat.replace('_', ' '))
	# Step 2: Robust validation using adult dataset metadata
	from constraints import REPEAT_NUM_FEATURES, REPEAT_CAT_FEATURES
	info = self.data.get('info', {})
	for feature in self.required_features:
	value = self.user_features.get(feature)
	if value is None:
	continue
	# Numeric validation
	if feature in info.get('num_features', []):
	try:
	val = float(value)
	minv, maxv = info.get('feature_ranges', {}).get(feature, (None, None))
	if minv is not None and (val < minv or val > maxv):
	del self.user_features[feature]
	return REPEAT_NUM_FEATURES.format(f"{minv}-{maxv}")
	except Exception:
	del self.user_features[feature]
	return REPEAT_NUM_FEATURES.format("valid number")
	# Categorical validation
	if feature in info.get('cat_features', []):
	valid = info.get('feature_values', {}).get(feature, [])
	if valid and value not in valid:
	del self.user_features[feature]
	return REPEAT_CAT_FEATURES.format(", ".join(valid))
	# Step 3: Intent classification and XAI routing using enhanced NLU
	intent_result, confidence, suggestions = self.nlu_model.classify_intent(user_input)
	from constraints import SUGGEST_SIMILAR_QUESTIONS_MSG, REPHRASE_QUESTION_MSG
	from xai_methods import route_to_xai_method
	# Route to appropriate XAI method based on intent
	if isinstance(intent_result, dict) and 'intent' in intent_result:
	if self.current_instance is None:
	self.select_random_instance()
	# Advanced visualization intents
	if intent_result['intent'] in ['shap_advanced', 'dtreeviz']:
	return self.get_visualization(intent_result['intent'], instance_df)
	# Standard explanation routing
	explanation_result = route_to_xai_method(self, intent_result)
	return explanation_result.get('explanation', 'Sorry, I could not generate an explanation.')
	elif intent_result == 'unknown' and suggestions:
	suggestions_str = "\n".join([f"{idx}. {q}" for idx, q in enumerate(suggestions, 1)])
	return SUGGEST_SIMILAR_QUESTIONS_MSG.format(suggestions=suggestions_str)
	else:
	return REPHRASE_QUESTION_MSG