File size: 7,961 Bytes
5e52c91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8197fee
 
e1c640c
5e52c91
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
from typing import List, Optional

import pandas as pd
import streamlit as st
import streamlit_antd_components as sac

from workflow.dataloading.dataloading_core import process_complex_data, load_from_path, load_concat_file, PathFileWrapper


def loading_data_file(agent):

    st.info(
        "💡 提示:\n"
        "1. 支持一次上传多个数据文件\n"
        "2. 自动使用大模型分析并处理数据\n"
        "3. 支持多种格式的文件类型上传\n"
    )

    selected_index = sac.tabs([
        sac.TabsItem(label='本地上传'),
        sac.TabsItem(label='路径导入'),
    ], color='#5980AE',)

    if selected_index == "本地上传":
        # 点击上传文件
        uploaded_files = st.file_uploader(
            "选择新文件",
            accept_multiple_files=True,
            help="拖拽或点击上传多个文件",
        )

        if uploaded_files:
            current_memory_file_name = agent.load_file_name()
            new_files = [f for f in uploaded_files if f.name not in current_memory_file_name]
            if new_files:
                try:
                    with st.spinner("正在处理数据..."):
                        df, dfs = process_complex_data(new_files, agent)
                    if df is not None:
                        agent.add_df(df)
                        agent.save_dfs(dfs)
                        for f in new_files:
                            agent.save_file_name(f.name)
                        st.rerun()
                except Exception as err:
                    st.error(f"导入失败:{err}")

    elif selected_index == "路径导入":
        # 路径上传文件
        raw_paths = st.text_area(
            "从路径导入数据 (每行一个文件路径)",
            placeholder=    "C:\\data\\iris.names\nC:\\data\\iris.data",
            height=100
        )

        if st.button("从路径加载文件", use_container_width=True):
            if raw_paths:

                path_list = [p.strip().strip("'\"") for p in raw_paths.strip().split('\n') if p.strip()]
                
                valid_paths = [p for p in path_list if os.path.exists(p)]
                invalid_paths = [p for p in path_list if not os.path.exists(p)]

                if invalid_paths:
                    st.warning(f"路径不存在,已跳过:\n- " + "\n- ".join(invalid_paths))

                if not valid_paths:
                    st.error("未找到任何有效的本地文件路径。")
                else:
                    current_memory_file_name = agent.load_file_name()
                    new_paths = [p for p in valid_paths if p not in current_memory_file_name]

                    if not new_paths:
                        st.info("所有指定的路径文件均已加载。")
                    else:
                        files_to_process = [PathFileWrapper(p) for p in new_paths]
                        try:
                            with st.spinner("正在处理数据..."):
                                df, dfs = process_complex_data(files_to_process, agent)
                            if df is not None:
                                agent.add_df(df)
                                agent.save_dfs(dfs)
                                for p in new_paths:
                                    agent.save_file_name(p)
                                st.rerun()
                        except Exception as err:
                            st.error(f"本地文件读取失败:{err}")
    
    dfs = agent.load_dfs()
    if dfs is not None and len(dfs) >= 2:
        load_concat_file(dfs, agent)


def loading_basic_info(agent):
    
    df = agent.load_df()
    if df is not None:
        r, c = df.shape
        missing = int(df.isnull().sum().sum())
        col1, col2, col3 = st.columns(3)
        col1.metric("行数", r)
        col2.metric("列数", c)
        col3.metric("缺失值总数", missing)

        dtype_info = pd.DataFrame({
            "列名": df.columns,
            "类型": df.dtypes.astype(str),
            "非空": df.count().values,
            "缺失%": (df.isnull().mean() * 100).round(2).values,
        }).reset_index(drop=True)

        selected_index = sac.tabs([
            sac.TabsItem(label='数据类型概览'),
            sac.TabsItem(label='数据预览'),
        ],color='#5980AE',)

        if selected_index == "数据类型概览":
            st.dataframe(dtype_info, use_container_width=True)
        elif selected_index == "数据预览":
            if st.button("🎲 随机抽样"):
                display_df = df.sample(10)
                st.dataframe(display_df, use_container_width=True)
            else:
                st.dataframe(df.head(10), use_container_width=True)


def loading_chat(agent, auto=False) -> None:

    df = agent.load_df()
    if df is None:
        return

    with st.chat_message("assistant"):
        st.write(
            "我是 Autostat 数据分析助手,很高兴为您服务!\n\n"
            "请先上传您的数据文件,上传完成后,您可以在下方和我对话,也可以直接点击按钮解析数据含义。"
        )
        analyze_btn = st.button("🔍 解析含义")
        result_placeholder = st.empty()
        
    # 渲染历史对话
    chat_history = agent.load_memory()

    for idx, entry in enumerate(chat_history):
        bubble = st.chat_message(entry["role"])
        content = entry["content"]
        if isinstance(content, str):
            bubble.write(content)

    already_generated = any(
        entry["role"] == "assistant" and "含义" in str(entry["content"])
        for entry in chat_history
    )

    if analyze_btn or (auto and not already_generated):
        st.chat_message("user").write("请帮我解析数据含义")
        agent.add_memory({"role": "user", "content": "请帮我解析数据含义"})
        with st.spinner("分析中..."):
            desc = agent.do_data_description(df)

        agent.finish_auto()
        st.chat_message("assistant").write(desc)
        agent.add_memory({"role": "assistant", "content": desc})
        st.rerun()

    # 用户自定义输入
    user_input = st.chat_input("请输入需求,例如“帮我分析xx列”")
    if user_input:
        st.chat_message("user").write(user_input)
        agent.add_memory({"role": "user", "content": user_input})
        with st.spinner("处理中…"):
            reply = agent.do_data_description(df, user_input)

        st.chat_message("assistant").write(reply)
        agent.add_memory({"role": "assistant", "content": reply})
        st.rerun()


if __name__ == "__main__":

    agent = st.session_state.data_loading_agent
    planner = st.session_state.planner_agent
    auto = planner.loading_auto

    if st.session_state.auto_mode == True:
        if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.loading_auto == False:
            planner.finish_loading_auto()
            st.switch_page("workflow/preprocessing/preprocessing_render.py")

    c1,c2 = st.columns(2)
    with c1:
        st.title("数据导入")
    with c2:
        st.write("")  
        st.write("")  
        sac.buttons([
            sac.ButtonsItem(label='Github', icon='github', href='https://github.com/Jiaye-s-Group/AutoSTAT'),
            sac.ButtonsItem(label='Doc', icon=sac.BsIcon(name='bi bi-file-earmark-post-fill', size=16), href='https://autostat.cc/docs/'),
            sac.ButtonsItem(label='Web', icon=sac.BsIcon(name='bi bi-globe2', size=16), href='https://autostat.cc/'),
        ], align='end', color='dark', variant='filled', index=None)
    st.markdown("---")

    c = st.columns(2)
    with c[0].expander('数据上传', True):
        loading_data_file(agent)
    with c[1].expander('数据建议', True):
        loading_chat(agent, auto)
    with c[0].expander('数据展示', True):
        loading_basic_info(agent)