File size: 4,048 Bytes
342e4c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495d32d
342e4c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import time
import traceback

import numpy as np
import pandas as pd
import streamlit as st
from streamlit_ace import st_ace
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, OneHotEncoder, OrdinalEncoder, RobustScaler, StandardScaler

from utils.sanitize_code import sanitize_code


def prep_meta_execution(agent, code, df, auto=False):

    edited = st_ace(
        value=code,
        height=400,
        theme="tomorrow_night",
        language="python",
        auto_update=True
    )

    not_generated = agent.load_processed_df() is None
    
    if code is not None:
        if st.button("▶️ 执行预处理") or (auto and not_generated):
            code = sanitize_code(edited)
            agent.save_code(code)

            exec_ns = {
                "df": df,
                "np": np,
                "pd": pd,
                "st": st,
                "SimpleImputer": SimpleImputer,
                "FunctionTransformer": FunctionTransformer,
                "StandardScaler": StandardScaler,
                "MinMaxScaler": MinMaxScaler,
                "RobustScaler": RobustScaler,
                "OneHotEncoder": OneHotEncoder,
                "OrdinalEncoder": OrdinalEncoder,
                "LabelEncoder": LabelEncoder,
                "ColumnTransformer": ColumnTransformer,
                "Pipeline": Pipeline,
            }

            try:
                with st.spinner("正在运行程序..."):
                    exec(code, exec_ns)
            except Exception as exc:
                st.error(f"已保存报错,正在重新调用llm生成代码debug")
                st.text(traceback.format_exc())
                agent.save_error(traceback.format_exc())
                prep_code_gen(agent, debug=True)
            else:
                process_df = exec_ns.get("process_df")
                if process_df is None:
                    st.error(
                        "脚本未写入 `process_df`。请确保编辑后的脚本在末尾赋值 process_df"
                    )
                else:
                    agent.save_processed_df(process_df)
                    agent.finish_auto()
                    st.rerun()
                    return process_df
                    

def prep_code_gen(agent, auto = False, debug = False):

    suggest = agent.load_preprocessing_suggestions()
    df = agent.load_df()

    chat_history = agent.load_memory()
    already_generated = any(
        entry["role"] == "assistant" and "预处理脚本已更新!请重新运行代码!" in str(entry["content"])
        for entry in chat_history
    )

    if suggest is not None:

        if debug == True or (auto and not already_generated):
            with st.spinner("预处理 Agent 正在编写脚本..."):
                raw = agent.code_generation(
                    df.head(10).to_string(),
                    suggest,
                )
                code = sanitize_code(raw)
                agent.save_code(code)

            st.chat_message("assistant").write("预处理脚本已更新!请重新运行代码!")
            agent.add_memory({"role": "assistant", "content": "预处理脚本已更新!请重新运行代码!"})

            st.rerun()

        analyze_btn = st.button("🔧 生成预处理代码", key='prep_code')
        if analyze_btn:
            with st.spinner("向 LLM 请求生成预处理脚本..."):
                raw = agent.code_generation(
                    df.head(10).to_string(),
                    suggest,
                )
                code = sanitize_code(raw)
                agent.save_code(code)

            st.chat_message("assistant").write("预处理脚本已更新!请重新运行代码!")
            agent.add_memory({"role": "assistant", "content": "预处理脚本已更新!请重新运行代码!"})

            st.rerun()