bayesian-network / llm_assistant.py
Wen1201's picture
Upload 3 files
0ee744a verified
from openai import OpenAI
import json
import numpy as np
class LLMAssistant:
"""
LLM 問答助手
協助用戶理解貝葉斯網路分析結果
"""
def __init__(self, api_key, session_id):
"""
初始化 LLM 助手
Args:
api_key: OpenAI API key
session_id: 唯一的 session 識別碼
"""
self.client = OpenAI(api_key=api_key)
self.session_id = session_id
self.conversation_history = []
# 系統提示詞
self.system_prompt = """You are an expert data scientist specializing in Bayesian Networks and machine learning.
Your role is to help users understand their Bayesian Network analysis results.
You should:
1. Explain complex statistical concepts in simple terms
2. Provide insights about model performance metrics
3. Suggest improvements when asked
4. Explain the structure and relationships in the Bayesian Network
5. Help interpret conditional probability tables (CPTs)
6. Discuss limitations and assumptions of the model
7. Perform personalized risk predictions from patient descriptions**
8. Provide empathetic, evidence-based interpretations of risk levels**
When performing predictions:
- Extract relevant medical features from natural language descriptions
- Clearly communicate risk levels (High/Moderate/Low) with probabilities
- Explain key risk factors in understandable terms
- Always emphasize limitations and the need for professional medical consultation
Always be clear, concise, and educational. Use examples when helpful.
Format your responses with proper markdown for better readability."""
def get_response(self, user_message, analysis_results):
"""
獲取 AI 回應
Args:
user_message: 用戶訊息
analysis_results: 分析結果字典
Returns:
str: AI 回應
"""
# 準備上下文資訊
context = self._prepare_context(analysis_results)
# 添加用戶訊息到歷史
self.conversation_history.append({
"role": "user",
"content": user_message
})
# 構建訊息列表
messages = [
{"role": "system", "content": self.system_prompt},
{"role": "system", "content": f"Analysis Context:\n{context}"}
] + self.conversation_history
try:
# 調用 OpenAI API
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
temperature=0.7,
max_tokens=1500
)
assistant_message = response.choices[0].message.content
# 添加助手回應到歷史
self.conversation_history.append({
"role": "assistant",
"content": assistant_message
})
return assistant_message
except Exception as e:
return f"❌ Error: {str(e)}\n\nPlease check your API key and try again."
def _prepare_context(self, results):
"""準備分析結果的上下文資訊"""
if not results:
return "No analysis results available yet."
# 提取關鍵資訊
params = results['parameters']
train_metrics = results['train_metrics']
test_metrics = results['test_metrics']
scores = results['scores']
# 構建上下文字串
context = f"""
## Model Configuration
- Algorithm: {params['algorithm']}
- Estimator: {params['estimator']}
- Number of Features: {params['n_features']}
- Categorical: {len(params['cat_features'])}
- Continuous: {len(params['con_features'])}
- Target Variable: {params['target_variable']}
- Test Set Proportion: {params['test_fraction']:.0%}
## Training Set Performance
- Accuracy: {train_metrics['accuracy']:.2f}%
- Precision: {train_metrics['precision']:.2f}%
- Recall: {train_metrics['recall']:.2f}%
- F1-Score: {train_metrics['f1']:.2f}%
- AUC: {train_metrics['auc']:.4f}
- G-mean: {train_metrics['g_mean']:.2f}%
- P-mean: {train_metrics['p_mean']:.2f}%
- Specificity: {train_metrics['specificity']:.2f}%
## Test Set Performance
- Accuracy: {test_metrics['accuracy']:.2f}%
- Precision: {test_metrics['precision']:.2f}%
- Recall: {test_metrics['recall']:.2f}%
- F1-Score: {test_metrics['f1']:.2f}%
- AUC: {test_metrics['auc']:.4f}
- G-mean: {test_metrics['g_mean']:.2f}%
- P-mean: {test_metrics['p_mean']:.2f}%
- Specificity: {test_metrics['specificity']:.2f}%
## Model Scores
- Log-Likelihood: {scores['log_likelihood']:.2f}
- BIC Score: {scores['bic']:.2f}
- K2 Score: {scores['k2']:.2f}
- BDeu Score: {scores['bdeu']:.2f}
- BDs Score: {scores['bds']:.2f}
## Network Structure
- Total Nodes: {len(results['model'].nodes())}
- Total Edges: {len(results['model'].edges())}
- Network Edges: {list(results['model'].edges())[:10]}... (showing first 10)
"""
return context
def generate_summary(self, analysis_results):
"""
自動生成分析結果總結
Args:
analysis_results: 分析結果字典
Returns:
str: 總結文字
"""
summary_prompt = """Based on the analysis results provided in the context, please generate a comprehensive summary that includes:
1. **Model Overview**: Brief description of the model type and configuration
2. **Performance Analysis**:
- Overall model performance on both training and test sets
- Comparison between training and test performance (overfitting/underfitting)
- Key strengths and weaknesses
3. **Network Structure Insights**: What the learned structure tells us about variable relationships
4. **Recommendations**: Specific suggestions for improvement
5. **Limitations**: Important caveats and limitations to consider
Format the summary in clear markdown with appropriate sections and bullet points."""
return self.get_response(summary_prompt, analysis_results)
def explain_metric(self, metric_name, analysis_results):
"""
解釋特定指標
Args:
metric_name: 指標名稱
analysis_results: 分析結果字典
Returns:
str: 指標解釋
"""
explain_prompt = f"""Please explain the following metric in the context of this analysis:
Metric: {metric_name}
Include:
1. What this metric measures
2. The value obtained in this analysis (training and test)
3. How to interpret this value
4. What it tells us about model performance
5. How it relates to other metrics in the analysis"""
return self.get_response(explain_prompt, analysis_results)
def suggest_improvements(self, analysis_results):
"""
提供改進建議
Args:
analysis_results: 分析結果字典
Returns:
str: 改進建議
"""
improve_prompt = """Based on the current model performance and configuration, please provide specific, actionable recommendations for improvement.
Consider:
1. Feature engineering opportunities
2. Algorithm selection
3. Hyperparameter tuning
4. Data quality issues
5. Model complexity trade-offs
Prioritize recommendations by potential impact."""
return self.get_response(improve_prompt, analysis_results)
def explain_network_structure(self, analysis_results):
"""
解釋網路結構
Args:
analysis_results: 分析結果字典
Returns:
str: 網路結構解釋
"""
structure_prompt = """Please explain the learned Bayesian Network structure:
1. What are the key relationships (edges) discovered?
2. What do these relationships tell us about the domain?
3. Are there any surprising or interesting patterns?
4. How does the structure relate to the target variable?
5. What are the implications for prediction and inference?"""
return self.get_response(structure_prompt, analysis_results)
def compare_algorithms(self, analysis_results):
"""
比較不同演算法
Args:
analysis_results: 分析結果字典
Returns:
str: 演算法比較
"""
compare_prompt = f"""The current model uses the {analysis_results['parameters']['algorithm']} algorithm.
Please:
1. Explain the characteristics of this algorithm
2. Compare it with other available algorithms (NB, TAN, CL, HC, PC)
3. Discuss when this algorithm is most appropriate
4. Suggest if a different algorithm might be better for this dataset
5. Explain the trade-offs involved"""
return self.get_response(compare_prompt, analysis_results)
def predict_from_text(self, user_description, analyzer, target_variable, feature_list):
"""
從文字描述中提取特徵並進行預測
Args:
user_description: 用戶的文字描述
analyzer: BayesianNetworkAnalyzer 實例
target_variable: 目標變數
feature_list: 模型使用的特徵列表
Returns:
str: AI 回應包含預測結果
"""
# Step 1: 使用 LLM 從文字中提取結構化特徵
extraction_prompt = f"""
You are a medical data analyst. Extract the following patient features from the description:
Required features: {', '.join(feature_list)}
User description: "{user_description}"
Please extract the values in JSON format. If a feature is not mentioned, use "unknown".
Return ONLY the JSON object, no other text.
Example format:
{{
"age": 65,
"size": 25,
"grade": 2,
"nodes": 1,
...
}}
"""
# 呼叫 OpenAI API 提取特徵
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a precise medical data extractor. Return only valid JSON."},
{"role": "user", "content": extraction_prompt}
],
temperature=0.1
)
# 解析 JSON
extracted_features = json.loads(response.choices[0].message.content)
# Step 2: 移除 unknown 值
evidence_dict = {k: v for k, v in extracted_features.items()
if v != "unknown" and k != target_variable}
# Step 3: 使用模型進行預測
prediction = analyzer.predict_single_instance(evidence_dict, target_variable)
# Step 4: 讓 LLM 生成易懂的回應
interpretation_prompt = f"""
Based on the Bayesian Network model analysis:
Patient features: {evidence_dict}
Predicted death probability: {prediction['probability']:.2%}
Risk level: {prediction['risk_level']}
Please provide a clear, empathetic explanation including:
1. A summary of the patient's key risk factors
2. The predicted risk level and what it means
3. Important considerations and limitations
4. Recommendations for next steps
Be professional but accessible. Use markdown formatting.
"""
final_response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a compassionate medical AI assistant."},
{"role": "user", "content": interpretation_prompt}
],
temperature=0.7
)
return final_response.choices[0].message.content
def reset_conversation(self):
"""重置對話歷史"""
self.conversation_history = []