meta_analysis / meta_analysis_llm_assistant.py
Donlagon007's picture
Upload 8 files
a0bc3fc verified
raw
history blame
18.6 kB
import json
import re
import io
from PIL import Image
class MetaAnalysisLLMAssistant:
"""
貝氏後設分析 LLM 問答助手
支援 Google Gemini 和 Anthropic Claude
協助用戶理解貝氏後設分析結果
"""
def __init__(self, api_key, session_id, api_provider="Google Gemini"):
"""
初始化 LLM 助手
Args:
api_key: API key (Gemini 或 Claude)
session_id: 唯一的 session 識別碼
api_provider: API 提供商 ("Google Gemini" 或 "Anthropic Claude")
"""
self.api_provider = api_provider
self.session_id = session_id
self.conversation_history = []
if api_provider == "Google Gemini":
import google.generativeai as genai
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel('gemini-2.0-flash')
self.client = None
elif api_provider == "Anthropic Claude":
import anthropic
self.client = anthropic.Anthropic(api_key=api_key)
self.model_name = "claude-sonnet-4-20250514"
self.model = None
else: # OpenAI GPT-4o
from openai import OpenAI
self.client = OpenAI(api_key=api_key)
self.model_name = "gpt-4o"
self.model = None
# 系統提示詞
self.system_prompt = """You are an expert Bayesian statistician specializing in hierarchical models and meta-analysis, particularly in the context of Pokémon battle statistics across multiple gyms (research sites).
**IMPORTANT - Language Instruction:**
- Always respond in the SAME language as the user's question
- If user asks in Traditional Chinese (繁體中文), respond in Traditional Chinese
- If user asks in English, respond in English
- Maintain language consistency throughout the conversation
你是一位精通貝氏階層模型和後設分析的統計專家,特別專注於寶可夢在多個道館(研究地點)的對戰統計分析。
Your role is to help users understand Bayesian meta-analysis results comparing win rates between different Pokémon types across multiple gyms (research units).
你的角色是幫助使用者理解貝氏後設分析結果,了解不同寶可夢屬性在多個道館中的勝率比較。
You should:
1. Explain Bayesian meta-analysis concepts in simple, accessible terms
2. Interpret posterior distributions, HDI (Highest Density Interval), and credible intervals
3. Explain hierarchical structure and why it's useful for combining multiple studies
4. Help users understand heterogeneity (sigma) between different gyms
5. Discuss the practical significance of type advantages across gyms
6. Provide insights about which gyms show the strongest effects
7. Suggest battle strategies based on the meta-analysis findings
8. Clarify differences between Bayesian and frequentist meta-analysis
9. Explain MCMC diagnostics (R-hat, ESS) when relevant
10. Interpret predictive distributions for new gyms
你應該:
1. 用簡單易懂的方式解釋貝氏後設分析概念
2. 詮釋後驗分佈、HDI(最高密度區間)和可信區間
3. 解釋階層結構及其在整合多個研究中的優勢
4. 幫助使用者理解不同道館間的異質性(sigma)
5. 討論不同屬性優勢在各道館的實際意義
6. 提供哪些道館顯示最強效應的見解
7. 根據後設分析發現提出對戰策略建議
8. 說明貝氏與頻率論後設分析的差異
9. 適時解釋 MCMC 診斷指標(R-hat、ESS)
10. 解釋對新道館的預測分佈
Key concepts to explain when relevant:
- **Bayesian Meta-Analysis**: Combines evidence from multiple gyms, borrowing strength across studies
- **Hierarchical Model**: Overall effect (d) and study-specific effects (delta[i])
- **Prior & Posterior**: How data from multiple gyms updates beliefs
- **HDI (Highest Density Interval)**: 95% most credible values
- **d (overall effect)**: Average log odds ratio across all gyms
- **sigma (between-study heterogeneity)**: How much different gyms vary in type advantage
- **delta[i] (study-specific effects)**: Each gym's individual effect
- **delta_new (predictive effect)**: Prediction for a new, unobserved gym
- **Odds Ratio**: exp(d) - how much more likely one type is to win
- **MCMC**: Markov Chain Monte Carlo sampling method
- **Convergence**: R-hat < 1.1, good ESS (effective sample size)
- **Burn-in/Warmup**: Initial samples discarded before convergence
重要概念解釋(當相關時):
- **貝氏後設分析**:整合多個道館的證據,跨研究借用資訊
- **階層模型**:整體效應(d)和研究特定效應(delta[i])
- **先驗與後驗**:來自多個道館的資料如何更新信念
- **HDI(最高密度區間)**:95% 最可信的數值範圍
- **d(整體效應)**:跨所有道館的平均對數勝算比
- **sigma(研究間異質性)**:不同道館的屬性優勢差異程度
- **delta[i](研究特定效應)**:每個道館的個別效應
- **delta_new(預測效應)**:對新的、未觀測道館的預測
- **勝算比**:exp(d) - 一種屬性相對另一種獲勝的可能性倍數
- **MCMC**:馬可夫鏈蒙地卡羅抽樣方法
- **收斂性**:R-hat < 1.1,良好的 ESS(有效樣本數)
- **Burn-in/Warmup**:收斂前捨棄的初始樣本
When discussing Pokémon type matchups across gyms:
- Connect statistical findings to type advantage mechanics
- Explain why certain gyms might show different patterns
- Discuss variation across gyms and their causes (e.g., gym-specific strategies, local meta)
- Identify which gyms show unusual results
- Consider implications for competitive play
討論寶可夢屬性在各道館的對抗時:
- 將統計發現連結到屬性相剋機制
- 解釋為何特定道館可能顯示不同模式
- 討論跨道館的變異及其可能原因(例如道館特定策略、當地環境)
- 識別哪些道館顯示異常結果
- 考慮對競技對戰的影響
Always be clear, educational, and engaging. Use examples when helpful.
Format responses with proper markdown for better readability.
請務必清晰、具教育性、引人入勝。適時使用範例說明。使用適當的 Markdown 格式以提升可讀性。"""
def get_response(self, user_message, analysis_results=None):
"""
獲取 AI 回應
Args:
user_message: 用戶訊息
analysis_results: 分析結果字典(可選)
Returns:
str: 回應文字
"""
# 準備上下文資訊
context = ""
if analysis_results:
context = self._prepare_context(analysis_results)
# 添加用戶訊息到歷史
self.conversation_history.append({
"role": "user",
"content": user_message
})
try:
# 構建完整的提示詞
full_prompt = self.system_prompt
if context:
full_prompt += f"\n\n## Current Meta-Analysis Context:\n{context}"
# 構建對話歷史文字
conversation_text = "\n\n## Conversation History:\n"
for msg in self.conversation_history[:-1]:
role = "User" if msg["role"] == "user" else "Assistant"
conversation_text += f"\n{role}: {msg['content']}\n"
# 組合最終提示詞
final_prompt = full_prompt + conversation_text + f"\nUser: {user_message}\n\nAssistant:"
# 調用對應的 API
if self.api_provider == "Google Gemini":
import google.generativeai as genai
response = self.model.generate_content(
final_prompt,
generation_config=genai.types.GenerationConfig(
temperature=0.7,
max_output_tokens=4000,
)
)
assistant_message = response.text
elif self.api_provider == "Anthropic Claude":
response = self.client.messages.create(
model=self.model_name,
max_tokens=4000,
temperature=0.7,
system=self.system_prompt + (f"\n\n## Current Meta-Analysis Context:\n{context}" if context else ""),
messages=[
{"role": msg["role"], "content": msg["content"]}
for msg in self.conversation_history
]
)
assistant_message = response.content[0].text
else: # OpenAI GPT-4o
system_with_context = self.system_prompt + (f"\n\n## Current Meta-Analysis Context:\n{context}" if context else "")
response = self.client.chat.completions.create(
model=self.model_name,
max_tokens=4000,
temperature=0.7,
messages=[
{"role": "system", "content": system_with_context},
*[{"role": msg["role"], "content": msg["content"]}
for msg in self.conversation_history]
]
)
assistant_message = response.choices[0].message.content
# 添加助手回應到歷史
self.conversation_history.append({
"role": "assistant",
"content": assistant_message
})
return assistant_message
except Exception as e:
error_msg = f"API 呼叫失敗: {str(e)}"
return error_msg
def _prepare_context(self, results):
"""準備分析結果的上下文資訊"""
if not results:
return "目前尚無分析結果。No analysis results available yet."
overall = results['overall']
pred = results['predictive']
diag = results['diagnostics']
context = f"""
## Current Bayesian Meta-Analysis | 目前的貝氏後設分析
### Study Information | 研究資訊
- **Treatment Type | 實驗組**: {results['treatment_type']}
- **Control Type | 對照組**: {results['control_type']}
- **Number of Gyms/Studies | 道館/研究數量**: {results['n_studies']}
### Overall Effect | 整體效應
- **d (Log Odds Ratio) | d(對數勝算比)**:
- Mean | 平均: {overall['d_mean']:.4f}
- SD | 標準差: {overall['d_sd']:.4f}
- 95% HDI: [{overall['d_hdi_low']:.4f}, {overall['d_hdi_high']:.4f}]
- **Odds Ratio | 勝算比**:
- Mean | 平均: {overall['or_mean']:.4f}
- SD | 標準差: {overall['or_sd']:.4f}
- 95% HDI: [{overall['or_hdi_low']:.4f}, {overall['or_hdi_high']:.4f}]
- **sigma (Between-study Heterogeneity) | sigma(研究間異質性)**:
- Mean | 平均: {overall['sigma_mean']:.4f}
- SD | 標準差: {overall['sigma_sd']:.4f}
- 95% HDI: [{overall['sigma_hdi_low']:.4f}, {overall['sigma_hdi_high']:.4f}]
### Predictive Effect for New Gym | 預測新道館效應
- **delta_new (Predictive Effect) | delta_new(預測效應)**:
- Mean | 平均: {pred['delta_new_mean']:.4f}
- SD | 標準差: {pred['delta_new_sd']:.4f}
- 95% HDI: [{pred['delta_new_hdi_low']:.4f}, {pred['delta_new_hdi_high']:.4f}]
- **Predictive OR | 預測勝算比**:
- Mean | 平均: {pred['or_new_mean']:.4f}
- 95% HDI: [{pred['or_new_hdi_low']:.4f}, {pred['or_new_hdi_high']:.4f}]
- **Uncertainty Increase | 不確定性增加**: {pred['uncertainty_ratio']:.2f}x
### Model Diagnostics | 模型診斷
- **R-hat (d)**: {f"{diag['rhat_d']:.4f}" if diag['rhat_d'] is not None else 'N/A'} {'✓' if diag['rhat_d'] and diag['rhat_d'] < 1.1 else '✗'}
- **R-hat (sigma)**: {f"{diag['rhat_sigma']:.4f}" if diag['rhat_sigma'] is not None else 'N/A'} {'✓' if diag['rhat_sigma'] and diag['rhat_sigma'] < 1.1 else '✗'}
- **ESS (d)**: {int(diag['ess_d']) if diag['ess_d'] is not None else 'N/A'}
- **ESS (sigma)**: {int(diag['ess_sigma']) if diag['ess_sigma'] is not None else 'N/A'}
- **Convergence | 收斂狀態**: {'✓ Converged 已收斂' if diag['converged'] else '✗ Not Converged 未收斂'}
### Key Finding | 關鍵發現
{
f"On average across all gyms, {results['treatment_type']} type is {overall['or_mean']:.2f} times more likely to win compared to {results['control_type']} type (95% HDI: [{overall['or_hdi_low']:.2f}, {overall['or_hdi_high']:.2f}]). 平均而言,跨所有道館,{results['treatment_type']} 屬性獲勝的可能性是 {results['control_type']} 屬性的 {overall['or_mean']:.2f} 倍(95% HDI: [{overall['or_hdi_low']:.2f}, {overall['or_hdi_high']:.2f}])。"
if overall['or_mean'] > 1
else f"Interestingly, the data suggests no clear advantage or even a slight disadvantage across gyms. 有趣的是,資料顯示跨道館並無明顯優勢,甚至可能略有劣勢。"
}
The variation between gyms (sigma = {overall['sigma_mean']:.3f}) indicates {'high heterogeneity - results vary substantially across different gyms 高異質性 - 不同道館的結果差異很大' if overall['sigma_mean'] > 0.5 else 'moderate heterogeneity - some variation across gyms 中等異質性 - 道館間有一定變異' if overall['sigma_mean'] > 0.3 else 'low heterogeneity - results are relatively consistent across gyms 低異質性 - 道館間結果相對一致'}.
"""
return context
def generate_summary(self, analysis_results):
"""自動生成分析結果總結"""
summary_prompt = """請根據提供的貝氏後設分析結果生成一份完整的總結報告,包含:
1. **研究目的**:簡述這個後設分析在研究什麼
2. **整體發現**:
- 跨所有道館的整體效應是什麼?
- d 和勝算比告訴我們什麼?
- HDI 的意義是什麼?
3. **道館間差異**:
- sigma 告訴我們什麼?
- 不同道館的結果一致嗎?
4. **預測新道館**:
- 如果開設新道館,預期結果如何?
- 不確定性有多大?
5. **模型品質**:
- 模型收斂得好嗎?(R-hat、ESS)
- 結果可信嗎?
6. **實戰啟示**:
- 訓練師如何運用這些資訊?
- 在不同道館應該注意什麼?
請用清楚的繁體中文 Markdown 格式撰寫,包含適當的章節標題。"""
return self.get_response(summary_prompt, analysis_results)
def explain_metric(self, metric_name, analysis_results):
"""解釋特定指標"""
metric_explanations = {
'd': 'd (整體對數勝算比)',
'sigma': 'sigma (道館間異質性)',
'or': 'Odds Ratio (勝算比)',
'hdi': '95% HDI (最高密度區間)',
'delta': 'delta (道館特定效應)',
'delta_new': 'delta_new (預測新道館效應)',
'rhat': 'R-hat (收斂診斷)',
'ess': 'ESS (有效樣本數)'
}
metric_display = metric_explanations.get(metric_name, metric_name)
explain_prompt = f"""請在這次貝氏後設分析的脈絡下,解釋以下指標:
指標:{metric_display}
請包含:
1. 這個指標在貝氏後設分析中測量什麼?
2. 在本次分析中得到的數值是多少?
3. 如何從寶可夢對戰和道館的角度詮釋這個數值?
4. 與頻率論後設分析的對應指標有何不同?
5. 有什麼需要注意的限制或注意事項?
請用繁體中文回答。"""
return self.get_response(explain_prompt, analysis_results)
def explain_bayesian_meta_analysis(self):
"""解釋貝氏後設分析的概念"""
explain_prompt = """請用簡單的方式解釋貝氏後設分析,特別是在寶可夢道館對戰分析的情境下。
請涵蓋:
1. 什麼是後設分析?為什麼要整合多個道館的資料?
2. 貝氏方法相比傳統後設分析有什麼優勢?
3. 「借用資訊」(borrowing strength) 在後設分析中是什麼意思?
4. 階層模型如何處理道館間的異質性?
5. d、sigma、delta[i] 和 delta_new 之間的關係是什麼?
6. 預測分佈 (predictive distribution) 的實際意義?
請用寶可夢的實際例子讓說明更具體易懂,全程使用繁體中文。"""
return self.get_response(explain_prompt, None)
def battle_strategy_advice(self, analysis_results):
"""提供對戰策略建議"""
strategy_prompt = """根據貝氏後設分析的結果,請為寶可夢訓練師提供實際的對戰策略建議。
請考慮:
1. 整體而言,這個屬性優勢有多大?在所有道館都適用嗎?
2. 哪些道館特別顯示強效應?哪些道館效應不明顯?
3. 異質性(sigma)高/低對策略有什麼影響?
4. 在新道館(未知環境)對戰時應該如何決策?
5. 如何根據預測區間評估風險?
6. 對競技對戰和組隊有什麼啟示?
請具體且可操作,使用繁體中文回答。"""
return self.get_response(strategy_prompt, analysis_results)
def compare_gyms(self, analysis_results):
"""比較不同道館"""
compare_prompt = """請分析不同道館之間的效應差異。
請說明:
1. 道館間的異質性(sigma)告訴我們什麼?
2. 為什麼不同道館可能顯示不同的屬性優勢?
3. 有沒有特定道館顯示異常結果?可能的原因是什麼?
4. 這些差異對訓練師選擇道館挑戰有什麼啟示?
5. 如何利用後設分析結果在不同道館做出更好的決策?
請用繁體中文回答。"""
return self.get_response(compare_prompt, analysis_results)
def explain_predictive_inference(self, analysis_results):
"""解釋預測推論"""
explain_prompt = """請解釋後設分析中的「預測新研究」概念。
請說明:
1. delta_new 和 d 有什麼不同?
2. 為什麼預測新道館的不確定性比整體效應大?
3. 如何使用預測分佈做決策?
4. 在什麼情況下預測會比較準確/不準確?
5. 對實際應用(例如挑戰新開的道館)有什麼意義?
請用寶可夢的情境舉例,使用繁體中文回答。"""
return self.get_response(explain_prompt, analysis_results)
def reset_conversation(self):
"""重置對話歷史"""
self.conversation_history = []