Spaces:
Sleeping
Sleeping
| from openai import OpenAI | |
| import json | |
| import numpy as np | |
| class LLMAssistant: | |
| """ | |
| LLM 問答助手 | |
| 協助用戶理解貝葉斯網路分析結果 | |
| """ | |
| def __init__(self, api_key, session_id): | |
| """ | |
| 初始化 LLM 助手 | |
| Args: | |
| api_key: OpenAI API key | |
| session_id: 唯一的 session 識別碼 | |
| """ | |
| self.client = OpenAI(api_key=api_key) | |
| self.session_id = session_id | |
| self.conversation_history = [] | |
| # 系統提示詞 | |
| self.system_prompt = """You are an expert data scientist specializing in Bayesian Networks and machine learning. | |
| Your role is to help users understand their Bayesian Network analysis results. | |
| You should: | |
| 1. Explain complex statistical concepts in simple terms | |
| 2. Provide insights about model performance metrics | |
| 3. Suggest improvements when asked | |
| 4. Explain the structure and relationships in the Bayesian Network | |
| 5. Help interpret conditional probability tables (CPTs) | |
| 6. Discuss limitations and assumptions of the model | |
| 7. Perform personalized risk predictions from patient descriptions** | |
| 8. Provide empathetic, evidence-based interpretations of risk levels** | |
| When performing predictions: | |
| - Extract relevant medical features from natural language descriptions | |
| - Clearly communicate risk levels (High/Moderate/Low) with probabilities | |
| - Explain key risk factors in understandable terms | |
| - Always emphasize limitations and the need for professional medical consultation | |
| Always be clear, concise, and educational. Use examples when helpful. | |
| Format your responses with proper markdown for better readability.""" | |
| def get_response(self, user_message, analysis_results): | |
| """ | |
| 獲取 AI 回應 | |
| Args: | |
| user_message: 用戶訊息 | |
| analysis_results: 分析結果字典 | |
| Returns: | |
| str: AI 回應 | |
| """ | |
| # 準備上下文資訊 | |
| context = self._prepare_context(analysis_results) | |
| # 添加用戶訊息到歷史 | |
| self.conversation_history.append({ | |
| "role": "user", | |
| "content": user_message | |
| }) | |
| # 構建訊息列表 | |
| messages = [ | |
| {"role": "system", "content": self.system_prompt}, | |
| {"role": "system", "content": f"Analysis Context:\n{context}"} | |
| ] + self.conversation_history | |
| try: | |
| # 調用 OpenAI API | |
| response = self.client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=messages, | |
| temperature=0.7, | |
| max_tokens=1500 | |
| ) | |
| assistant_message = response.choices[0].message.content | |
| # 添加助手回應到歷史 | |
| self.conversation_history.append({ | |
| "role": "assistant", | |
| "content": assistant_message | |
| }) | |
| return assistant_message | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}\n\nPlease check your API key and try again." | |
| def _prepare_context(self, results): | |
| """準備分析結果的上下文資訊""" | |
| if not results: | |
| return "No analysis results available yet." | |
| # 提取關鍵資訊 | |
| params = results['parameters'] | |
| train_metrics = results['train_metrics'] | |
| test_metrics = results['test_metrics'] | |
| scores = results['scores'] | |
| # 構建上下文字串 | |
| context = f""" | |
| ## Model Configuration | |
| - Algorithm: {params['algorithm']} | |
| - Estimator: {params['estimator']} | |
| - Number of Features: {params['n_features']} | |
| - Categorical: {len(params['cat_features'])} | |
| - Continuous: {len(params['con_features'])} | |
| - Target Variable: {params['target_variable']} | |
| - Test Set Proportion: {params['test_fraction']:.0%} | |
| ## Training Set Performance | |
| - Accuracy: {train_metrics['accuracy']:.2f}% | |
| - Precision: {train_metrics['precision']:.2f}% | |
| - Recall: {train_metrics['recall']:.2f}% | |
| - F1-Score: {train_metrics['f1']:.2f}% | |
| - AUC: {train_metrics['auc']:.4f} | |
| - G-mean: {train_metrics['g_mean']:.2f}% | |
| - P-mean: {train_metrics['p_mean']:.2f}% | |
| - Specificity: {train_metrics['specificity']:.2f}% | |
| ## Test Set Performance | |
| - Accuracy: {test_metrics['accuracy']:.2f}% | |
| - Precision: {test_metrics['precision']:.2f}% | |
| - Recall: {test_metrics['recall']:.2f}% | |
| - F1-Score: {test_metrics['f1']:.2f}% | |
| - AUC: {test_metrics['auc']:.4f} | |
| - G-mean: {test_metrics['g_mean']:.2f}% | |
| - P-mean: {test_metrics['p_mean']:.2f}% | |
| - Specificity: {test_metrics['specificity']:.2f}% | |
| ## Model Scores | |
| - Log-Likelihood: {scores['log_likelihood']:.2f} | |
| - BIC Score: {scores['bic']:.2f} | |
| - K2 Score: {scores['k2']:.2f} | |
| - BDeu Score: {scores['bdeu']:.2f} | |
| - BDs Score: {scores['bds']:.2f} | |
| ## Network Structure | |
| - Total Nodes: {len(results['model'].nodes())} | |
| - Total Edges: {len(results['model'].edges())} | |
| - Network Edges: {list(results['model'].edges())[:10]}... (showing first 10) | |
| """ | |
| return context | |
| def generate_summary(self, analysis_results): | |
| """ | |
| 自動生成分析結果總結 | |
| Args: | |
| analysis_results: 分析結果字典 | |
| Returns: | |
| str: 總結文字 | |
| """ | |
| summary_prompt = """Based on the analysis results provided in the context, please generate a comprehensive summary that includes: | |
| 1. **Model Overview**: Brief description of the model type and configuration | |
| 2. **Performance Analysis**: | |
| - Overall model performance on both training and test sets | |
| - Comparison between training and test performance (overfitting/underfitting) | |
| - Key strengths and weaknesses | |
| 3. **Network Structure Insights**: What the learned structure tells us about variable relationships | |
| 4. **Recommendations**: Specific suggestions for improvement | |
| 5. **Limitations**: Important caveats and limitations to consider | |
| Format the summary in clear markdown with appropriate sections and bullet points.""" | |
| return self.get_response(summary_prompt, analysis_results) | |
| def explain_metric(self, metric_name, analysis_results): | |
| """ | |
| 解釋特定指標 | |
| Args: | |
| metric_name: 指標名稱 | |
| analysis_results: 分析結果字典 | |
| Returns: | |
| str: 指標解釋 | |
| """ | |
| explain_prompt = f"""Please explain the following metric in the context of this analysis: | |
| Metric: {metric_name} | |
| Include: | |
| 1. What this metric measures | |
| 2. The value obtained in this analysis (training and test) | |
| 3. How to interpret this value | |
| 4. What it tells us about model performance | |
| 5. How it relates to other metrics in the analysis""" | |
| return self.get_response(explain_prompt, analysis_results) | |
| def suggest_improvements(self, analysis_results): | |
| """ | |
| 提供改進建議 | |
| Args: | |
| analysis_results: 分析結果字典 | |
| Returns: | |
| str: 改進建議 | |
| """ | |
| improve_prompt = """Based on the current model performance and configuration, please provide specific, actionable recommendations for improvement. | |
| Consider: | |
| 1. Feature engineering opportunities | |
| 2. Algorithm selection | |
| 3. Hyperparameter tuning | |
| 4. Data quality issues | |
| 5. Model complexity trade-offs | |
| Prioritize recommendations by potential impact.""" | |
| return self.get_response(improve_prompt, analysis_results) | |
| def explain_network_structure(self, analysis_results): | |
| """ | |
| 解釋網路結構 | |
| Args: | |
| analysis_results: 分析結果字典 | |
| Returns: | |
| str: 網路結構解釋 | |
| """ | |
| structure_prompt = """Please explain the learned Bayesian Network structure: | |
| 1. What are the key relationships (edges) discovered? | |
| 2. What do these relationships tell us about the domain? | |
| 3. Are there any surprising or interesting patterns? | |
| 4. How does the structure relate to the target variable? | |
| 5. What are the implications for prediction and inference?""" | |
| return self.get_response(structure_prompt, analysis_results) | |
| def compare_algorithms(self, analysis_results): | |
| """ | |
| 比較不同演算法 | |
| Args: | |
| analysis_results: 分析結果字典 | |
| Returns: | |
| str: 演算法比較 | |
| """ | |
| compare_prompt = f"""The current model uses the {analysis_results['parameters']['algorithm']} algorithm. | |
| Please: | |
| 1. Explain the characteristics of this algorithm | |
| 2. Compare it with other available algorithms (NB, TAN, CL, HC, PC) | |
| 3. Discuss when this algorithm is most appropriate | |
| 4. Suggest if a different algorithm might be better for this dataset | |
| 5. Explain the trade-offs involved""" | |
| return self.get_response(compare_prompt, analysis_results) | |
| def predict_from_text(self, user_description, analyzer, target_variable, feature_list): | |
| """ | |
| 從文字描述中提取特徵並進行預測 | |
| Args: | |
| user_description: 用戶的文字描述 | |
| analyzer: BayesianNetworkAnalyzer 實例 | |
| target_variable: 目標變數 | |
| feature_list: 模型使用的特徵列表 | |
| Returns: | |
| str: AI 回應包含預測結果 | |
| """ | |
| # Step 1: 使用 LLM 從文字中提取結構化特徵 | |
| extraction_prompt = f""" | |
| You are a medical data analyst. Extract the following patient features from the description: | |
| Required features: {', '.join(feature_list)} | |
| User description: "{user_description}" | |
| Please extract the values in JSON format. If a feature is not mentioned, use "unknown". | |
| Return ONLY the JSON object, no other text. | |
| Example format: | |
| {{ | |
| "age": 65, | |
| "size": 25, | |
| "grade": 2, | |
| "nodes": 1, | |
| ... | |
| }} | |
| """ | |
| # 呼叫 OpenAI API 提取特徵 | |
| response = self.client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "system", "content": "You are a precise medical data extractor. Return only valid JSON."}, | |
| {"role": "user", "content": extraction_prompt} | |
| ], | |
| temperature=0.1 | |
| ) | |
| # 解析 JSON | |
| extracted_features = json.loads(response.choices[0].message.content) | |
| # Step 2: 移除 unknown 值 | |
| evidence_dict = {k: v for k, v in extracted_features.items() | |
| if v != "unknown" and k != target_variable} | |
| # Step 3: 使用模型進行預測 | |
| prediction = analyzer.predict_single_instance(evidence_dict, target_variable) | |
| # Step 4: 讓 LLM 生成易懂的回應 | |
| interpretation_prompt = f""" | |
| Based on the Bayesian Network model analysis: | |
| Patient features: {evidence_dict} | |
| Predicted death probability: {prediction['probability']:.2%} | |
| Risk level: {prediction['risk_level']} | |
| Please provide a clear, empathetic explanation including: | |
| 1. A summary of the patient's key risk factors | |
| 2. The predicted risk level and what it means | |
| 3. Important considerations and limitations | |
| 4. Recommendations for next steps | |
| Be professional but accessible. Use markdown formatting. | |
| """ | |
| final_response = self.client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "system", "content": "You are a compassionate medical AI assistant."}, | |
| {"role": "user", "content": interpretation_prompt} | |
| ], | |
| temperature=0.7 | |
| ) | |
| return final_response.choices[0].message.content | |
| def reset_conversation(self): | |
| """重置對話歷史""" | |
| self.conversation_history = [] | |