Upload mcnemar_llm_assistant.py
Browse files- mcnemar_llm_assistant.py +19 -55
mcnemar_llm_assistant.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
| 3 |
|
| 4 |
class McNemarLLMAssistant:
|
| 5 |
"""
|
| 6 |
-
McNemar 檢定 LLM 問答助手
|
| 7 |
協助用戶理解 McNemar 檢定分析結果
|
| 8 |
"""
|
| 9 |
|
|
@@ -106,63 +106,27 @@ Format responses with proper markdown for better readability.
|
|
| 106 |
"content": user_message
|
| 107 |
})
|
| 108 |
|
| 109 |
-
# 構建
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
|
| 112 |
-
# 加入上下文
|
| 113 |
if context:
|
| 114 |
-
|
| 115 |
|
| 116 |
# 加入對話歷史
|
| 117 |
-
|
| 118 |
-
role = "User" if msg["role"] == "user" else "Assistant"
|
| 119 |
-
full_input += f"{role}: {msg['content']}\n\n"
|
| 120 |
-
|
| 121 |
-
# 加入當前用戶訊息
|
| 122 |
-
full_input += f"User: {user_message}"
|
| 123 |
|
| 124 |
try:
|
| 125 |
-
#
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
input=final_input,
|
| 132 |
-
reasoning={
|
| 133 |
-
"effort": "minimal" # gpt-5-mini 最快速模式
|
| 134 |
-
},
|
| 135 |
-
text={
|
| 136 |
-
"verbosity": "medium" # 中等詳細度
|
| 137 |
-
},
|
| 138 |
-
max_output_tokens=4000 # GPT-5 使用 max_output_tokens
|
| 139 |
)
|
| 140 |
|
| 141 |
-
|
| 142 |
-
print("=" * 50)
|
| 143 |
-
print("DEBUG: Response type:", type(response))
|
| 144 |
-
print("DEBUG: Response attributes:", dir(response))
|
| 145 |
-
if hasattr(response, 'output'):
|
| 146 |
-
print("DEBUG: Output type:", type(response.output))
|
| 147 |
-
print("DEBUG: Output:", response.output)
|
| 148 |
-
print("=" * 50)
|
| 149 |
-
|
| 150 |
-
# GPT-5 Responses API 的回應結構
|
| 151 |
-
# output 可能是列表或物件,需要正確提取
|
| 152 |
-
if hasattr(response, 'output'):
|
| 153 |
-
output = response.output
|
| 154 |
-
if isinstance(output, list):
|
| 155 |
-
# 如果是列表,取第一個元素的 content
|
| 156 |
-
assistant_message = output[0].content if output else ""
|
| 157 |
-
elif hasattr(output, 'content'):
|
| 158 |
-
# 如果是物件,直接取 content
|
| 159 |
-
assistant_message = output.content
|
| 160 |
-
else:
|
| 161 |
-
# 如果都不是,嘗試轉為字串
|
| 162 |
-
assistant_message = str(output)
|
| 163 |
-
else:
|
| 164 |
-
# 備用:嘗試其他可能的結構
|
| 165 |
-
assistant_message = str(response)
|
| 166 |
|
| 167 |
# 添加助手回應到歷史
|
| 168 |
self.conversation_history.append({
|
|
@@ -249,11 +213,11 @@ Winner Low {results['contingency_table'].get(0, {}).get(1, 0):<15} {res
|
|
| 249 |
"""解釋特定指標"""
|
| 250 |
|
| 251 |
metric_explanations = {
|
| 252 |
-
'mcnemar_statistic': 'McNemar
|
| 253 |
-
'p_value': 'p
|
| 254 |
-
'odds_ratio': '
|
| 255 |
-
'confidence_interval': '95%
|
| 256 |
-
'discordant_pairs': '
|
| 257 |
}
|
| 258 |
|
| 259 |
metric_display = metric_explanations.get(metric_name, metric_name)
|
|
|
|
| 3 |
|
| 4 |
class McNemarLLMAssistant:
|
| 5 |
"""
|
| 6 |
+
McNemar 檢定 LLM 問答助手
|
| 7 |
協助用戶理解 McNemar 檢定分析結果
|
| 8 |
"""
|
| 9 |
|
|
|
|
| 106 |
"content": user_message
|
| 107 |
})
|
| 108 |
|
| 109 |
+
# 構建訊息列表
|
| 110 |
+
messages = [
|
| 111 |
+
{"role": "system", "content": self.system_prompt}
|
| 112 |
+
]
|
| 113 |
|
|
|
|
| 114 |
if context:
|
| 115 |
+
messages.append({"role": "system", "content": f"Current Analysis Context:\n{context}"})
|
| 116 |
|
| 117 |
# 加入對話歷史
|
| 118 |
+
messages.extend(self.conversation_history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
try:
|
| 121 |
+
# 調用 OpenAI API
|
| 122 |
+
response = self.client.chat.completions.create(
|
| 123 |
+
model="gpt-4o-mini",
|
| 124 |
+
messages=messages,
|
| 125 |
+
temperature=0.7,
|
| 126 |
+
max_tokens=1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
)
|
| 128 |
|
| 129 |
+
assistant_message = response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
# 添加助手回應到歷史
|
| 132 |
self.conversation_history.append({
|
|
|
|
| 213 |
"""解釋特定指標"""
|
| 214 |
|
| 215 |
metric_explanations = {
|
| 216 |
+
'mcnemar_statistic': 'McNemar 統計量',
|
| 217 |
+
'p_value': 'p 值',
|
| 218 |
+
'odds_ratio': '勝算比',
|
| 219 |
+
'confidence_interval': '95% 信賴區間',
|
| 220 |
+
'discordant_pairs': '不一致配對'
|
| 221 |
}
|
| 222 |
|
| 223 |
metric_display = metric_explanations.get(metric_name, metric_name)
|