Spaces:
Running
Running
Update prompt_engineer/planner.py
Browse files- prompt_engineer/planner.py +54 -24
prompt_engineer/planner.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import re
|
|
|
|
| 2 |
import json
|
|
|
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from typing import IO, List
|
|
@@ -24,6 +26,9 @@ class PlannerAgent(LLMClient):
|
|
| 24 |
self.switched_modeling = False
|
| 25 |
self.switched_report = False
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
def self_driving(self, df, user_input=None) -> str:
|
| 28 |
|
| 29 |
prompt = (
|
|
@@ -59,21 +64,63 @@ class PlannerAgent(LLMClient):
|
|
| 59 |
"""
|
| 60 |
|
| 61 |
plan_text = self.call(prompt)
|
| 62 |
-
print(plan_text)
|
| 63 |
try:
|
| 64 |
plan_dict = json.loads(plan_text)
|
| 65 |
except json.JSONDecodeError:
|
| 66 |
plan_text_fixed = plan_text.strip().strip('```json').strip('```')
|
| 67 |
plan_dict = json.loads(plan_text_fixed)
|
| 68 |
|
| 69 |
-
|
| 70 |
-
self.loading_auto = bool(plan_dict.get("loading_auto", False))
|
| 71 |
self.loading_auto = True
|
| 72 |
self.prep_auto = bool(plan_dict.get("prep_auto", False))
|
| 73 |
self.vis_auto = bool(plan_dict.get("vis_auto", False))
|
| 74 |
self.modeling_auto = bool(plan_dict.get("modeling_auto", False))
|
| 75 |
-
# self.modeling_auto = False
|
| 76 |
self.report_auto = bool(plan_dict.get("report_auto", False))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def finish_loading_auto(self) -> str:
|
|
@@ -101,13 +148,8 @@ class PlannerAgent(LLMClient):
|
|
| 101 |
self.switched_report = True
|
| 102 |
|
| 103 |
|
| 104 |
-
import json
|
| 105 |
-
import ast
|
| 106 |
-
import re
|
| 107 |
-
import traceback
|
| 108 |
-
|
| 109 |
def _extract_first_json(text: str):
|
| 110 |
-
|
| 111 |
if not text:
|
| 112 |
return None
|
| 113 |
start = text.find('{')
|
|
@@ -125,23 +167,15 @@ def _extract_first_json(text: str):
|
|
| 125 |
return None
|
| 126 |
|
| 127 |
def _safe_parse_json(text: str):
|
| 128 |
-
|
| 129 |
-
尝试多种策略解析 LLM 输出为 dict:
|
| 130 |
-
1) 直接 json.loads
|
| 131 |
-
2) 去除 Markdown code fence 后再 loads
|
| 132 |
-
3) 提取第一个完整花括号块后 loads
|
| 133 |
-
4) ast.literal_eval 作为最后手段(接受 Python dict 风格)
|
| 134 |
-
返回 (dict_or_None, used_text, error_message_or_None)
|
| 135 |
-
"""
|
| 136 |
if not text or not text.strip():
|
| 137 |
return None, text, "empty"
|
| 138 |
-
|
| 139 |
try:
|
| 140 |
return json.loads(text), text, None
|
| 141 |
except Exception as e1:
|
| 142 |
pass
|
| 143 |
|
| 144 |
-
# 2) 去掉 ```json / ``` fence
|
| 145 |
try:
|
| 146 |
cleaned = re.sub(r'```json\s*', '', text, flags=re.IGNORECASE)
|
| 147 |
cleaned = re.sub(r'```', '', cleaned)
|
|
@@ -150,7 +184,6 @@ def _safe_parse_json(text: str):
|
|
| 150 |
except Exception:
|
| 151 |
pass
|
| 152 |
|
| 153 |
-
# 3) 提取首个匹配的 { ... } 顶层块
|
| 154 |
try:
|
| 155 |
sub = _extract_first_json(text)
|
| 156 |
if sub:
|
|
@@ -158,7 +191,6 @@ def _safe_parse_json(text: str):
|
|
| 158 |
except Exception:
|
| 159 |
pass
|
| 160 |
|
| 161 |
-
# 4) ast.literal_eval 兼容 Python 字典��式(单引号等)
|
| 162 |
try:
|
| 163 |
literal = ast.literal_eval(text)
|
| 164 |
if isinstance(literal, dict):
|
|
@@ -166,7 +198,6 @@ def _safe_parse_json(text: str):
|
|
| 166 |
except Exception:
|
| 167 |
pass
|
| 168 |
|
| 169 |
-
# 5) 再次尝试在提取的子串上用 literal_eval(防止单引号)
|
| 170 |
try:
|
| 171 |
sub = _extract_first_json(text)
|
| 172 |
if sub:
|
|
@@ -176,5 +207,4 @@ def _safe_parse_json(text: str):
|
|
| 176 |
except Exception:
|
| 177 |
pass
|
| 178 |
|
| 179 |
-
# 最后,返回 None 并带上错误信息
|
| 180 |
return None, text, "unable_to_parse"
|
|
|
|
| 1 |
import re
|
| 2 |
+
import ast
|
| 3 |
import json
|
| 4 |
+
import traceback
|
| 5 |
|
| 6 |
import streamlit as st
|
| 7 |
from typing import IO, List
|
|
|
|
| 26 |
self.switched_modeling = False
|
| 27 |
self.switched_report = False
|
| 28 |
|
| 29 |
+
self.plan = None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
def self_driving(self, df, user_input=None) -> str:
|
| 33 |
|
| 34 |
prompt = (
|
|
|
|
| 64 |
"""
|
| 65 |
|
| 66 |
plan_text = self.call(prompt)
|
|
|
|
| 67 |
try:
|
| 68 |
plan_dict = json.loads(plan_text)
|
| 69 |
except json.JSONDecodeError:
|
| 70 |
plan_text_fixed = plan_text.strip().strip('```json').strip('```')
|
| 71 |
plan_dict = json.loads(plan_text_fixed)
|
| 72 |
|
| 73 |
+
# self.loading_auto = bool(plan_dict.get("loading_auto", False))
|
|
|
|
| 74 |
self.loading_auto = True
|
| 75 |
self.prep_auto = bool(plan_dict.get("prep_auto", False))
|
| 76 |
self.vis_auto = bool(plan_dict.get("vis_auto", False))
|
| 77 |
self.modeling_auto = bool(plan_dict.get("modeling_auto", False))
|
|
|
|
| 78 |
self.report_auto = bool(plan_dict.get("report_auto", False))
|
| 79 |
+
|
| 80 |
+
plan = self.analysis_path(df)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def analysis_path(self, df) -> str:
|
| 84 |
+
|
| 85 |
+
prompt = (
|
| 86 |
+
f"下面是一个数据集的基本信息\n\n"
|
| 87 |
+
f"- 数据维度:{df.shape[0]} 行 × {df.shape[1]} 列\n"
|
| 88 |
+
f"- 列名和数据类型:{dict(zip(df.columns.tolist(), df.dtypes.astype(str).tolist()))}\n"
|
| 89 |
+
f"- 前 5 行样本:\n{df.head().to_dict(orient='list')}\n\n"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
if st.session_state.preference_select:
|
| 93 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 94 |
+
if st.session_state.additional_preference:
|
| 95 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference},务必满足,高优先级”。\n\n"
|
| 96 |
+
|
| 97 |
+
prompt += f"""
|
| 98 |
+
你现在是一名资深的数据科学与统计建模专家,请基于上面提供的数据集特征与用户需求,
|
| 99 |
+
以“专家决策者”的角度做出全面、严谨、具解释性的综合判断。
|
| 100 |
+
|
| 101 |
+
在你已经给出的 5 项自动化开关决策中:
|
| 102 |
+
1. loading_auto —— 是否需要对数据列名进行初步分析? 你的选择:{self.loading_auto}
|
| 103 |
+
2. prep_auto —— 是否需要做数据预处理或清洗? 你的选择:{self.prep_auto}
|
| 104 |
+
3. vis_auto —— 是否需要做数据可视化? 你的选择:{self.vis_auto}
|
| 105 |
+
4. modeling_auto —— 是否需要建模或统计分析? 你的选择:{self.modeling_auto}
|
| 106 |
+
5. report_auto —— 是否需要生成分析报告? 你的选择:{self.report_auto}
|
| 107 |
+
|
| 108 |
+
请按照专家的水准,详细、系统性地阐述你的分析思路,并逐项解释你为何做出这些选择。
|
| 109 |
+
|
| 110 |
+
你的回答必须:
|
| 111 |
+
- 展现全局视角,而非仅局部判断;
|
| 112 |
+
- 给出清晰的推理链,而非简单理由;
|
| 113 |
+
- 指出每个决策的必要性、替代方案及其风险;
|
| 114 |
+
- 使用专业术语但保持可读性强;
|
| 115 |
+
- 说明如果不做该步骤会导致哪些问题(反事实解释)。
|
| 116 |
+
|
| 117 |
+
回答请结构化呈现。
|
| 118 |
+
"""
|
| 119 |
+
|
| 120 |
+
plan = self.call(prompt)
|
| 121 |
+
self.plan = plan
|
| 122 |
+
|
| 123 |
+
return self.plan
|
| 124 |
|
| 125 |
|
| 126 |
def finish_loading_auto(self) -> str:
|
|
|
|
| 148 |
self.switched_report = True
|
| 149 |
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
def _extract_first_json(text: str):
|
| 152 |
+
|
| 153 |
if not text:
|
| 154 |
return None
|
| 155 |
start = text.find('{')
|
|
|
|
| 167 |
return None
|
| 168 |
|
| 169 |
def _safe_parse_json(text: str):
|
| 170 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
if not text or not text.strip():
|
| 172 |
return None, text, "empty"
|
| 173 |
+
|
| 174 |
try:
|
| 175 |
return json.loads(text), text, None
|
| 176 |
except Exception as e1:
|
| 177 |
pass
|
| 178 |
|
|
|
|
| 179 |
try:
|
| 180 |
cleaned = re.sub(r'```json\s*', '', text, flags=re.IGNORECASE)
|
| 181 |
cleaned = re.sub(r'```', '', cleaned)
|
|
|
|
| 184 |
except Exception:
|
| 185 |
pass
|
| 186 |
|
|
|
|
| 187 |
try:
|
| 188 |
sub = _extract_first_json(text)
|
| 189 |
if sub:
|
|
|
|
| 191 |
except Exception:
|
| 192 |
pass
|
| 193 |
|
|
|
|
| 194 |
try:
|
| 195 |
literal = ast.literal_eval(text)
|
| 196 |
if isinstance(literal, dict):
|
|
|
|
| 198 |
except Exception:
|
| 199 |
pass
|
| 200 |
|
|
|
|
| 201 |
try:
|
| 202 |
sub = _extract_first_json(text)
|
| 203 |
if sub:
|
|
|
|
| 207 |
except Exception:
|
| 208 |
pass
|
| 209 |
|
|
|
|
| 210 |
return None, text, "unable_to_parse"
|