AutoSTAT / workflow /dataloading /dataloading_render.py
ElvisWang111's picture
Update workflow/dataloading/dataloading_render.py
e1c640c verified
import os
from typing import List, Optional
import pandas as pd
import streamlit as st
import streamlit_antd_components as sac
from workflow.dataloading.dataloading_core import process_complex_data, load_from_path, load_concat_file, PathFileWrapper
def loading_data_file(agent):
st.info(
"💡 提示:\n"
"1. 支持一次上传多个数据文件\n"
"2. 自动使用大模型分析并处理数据\n"
"3. 支持多种格式的文件类型上传\n"
)
selected_index = sac.tabs([
sac.TabsItem(label='本地上传'),
sac.TabsItem(label='路径导入'),
], color='#5980AE',)
if selected_index == "本地上传":
# 点击上传文件
uploaded_files = st.file_uploader(
"选择新文件",
accept_multiple_files=True,
help="拖拽或点击上传多个文件",
)
if uploaded_files:
current_memory_file_name = agent.load_file_name()
new_files = [f for f in uploaded_files if f.name not in current_memory_file_name]
if new_files:
try:
with st.spinner("正在处理数据..."):
df, dfs = process_complex_data(new_files, agent)
if df is not None:
agent.add_df(df)
agent.save_dfs(dfs)
for f in new_files:
agent.save_file_name(f.name)
st.rerun()
except Exception as err:
st.error(f"导入失败:{err}")
elif selected_index == "路径导入":
# 路径上传文件
raw_paths = st.text_area(
"从路径导入数据 (每行一个文件路径)",
placeholder= "C:\\data\\iris.names\nC:\\data\\iris.data",
height=100
)
if st.button("从路径加载文件", use_container_width=True):
if raw_paths:
path_list = [p.strip().strip("'\"") for p in raw_paths.strip().split('\n') if p.strip()]
valid_paths = [p for p in path_list if os.path.exists(p)]
invalid_paths = [p for p in path_list if not os.path.exists(p)]
if invalid_paths:
st.warning(f"路径不存在,已跳过:\n- " + "\n- ".join(invalid_paths))
if not valid_paths:
st.error("未找到任何有效的本地文件路径。")
else:
current_memory_file_name = agent.load_file_name()
new_paths = [p for p in valid_paths if p not in current_memory_file_name]
if not new_paths:
st.info("所有指定的路径文件均已加载。")
else:
files_to_process = [PathFileWrapper(p) for p in new_paths]
try:
with st.spinner("正在处理数据..."):
df, dfs = process_complex_data(files_to_process, agent)
if df is not None:
agent.add_df(df)
agent.save_dfs(dfs)
for p in new_paths:
agent.save_file_name(p)
st.rerun()
except Exception as err:
st.error(f"本地文件读取失败:{err}")
dfs = agent.load_dfs()
if dfs is not None and len(dfs) >= 2:
load_concat_file(dfs, agent)
def loading_basic_info(agent):
df = agent.load_df()
if df is not None:
r, c = df.shape
missing = int(df.isnull().sum().sum())
col1, col2, col3 = st.columns(3)
col1.metric("行数", r)
col2.metric("列数", c)
col3.metric("缺失值总数", missing)
dtype_info = pd.DataFrame({
"列名": df.columns,
"类型": df.dtypes.astype(str),
"非空": df.count().values,
"缺失%": (df.isnull().mean() * 100).round(2).values,
}).reset_index(drop=True)
selected_index = sac.tabs([
sac.TabsItem(label='数据类型概览'),
sac.TabsItem(label='数据预览'),
],color='#5980AE',)
if selected_index == "数据类型概览":
st.dataframe(dtype_info, use_container_width=True)
elif selected_index == "数据预览":
if st.button("🎲 随机抽样"):
display_df = df.sample(10)
st.dataframe(display_df, use_container_width=True)
else:
st.dataframe(df.head(10), use_container_width=True)
def loading_chat(agent, auto=False) -> None:
df = agent.load_df()
if df is None:
return
with st.chat_message("assistant"):
st.write(
"我是 Autostat 数据分析助手,很高兴为您服务!\n\n"
"请先上传您的数据文件,上传完成后,您可以在下方和我对话,也可以直接点击按钮解析数据含义。"
)
analyze_btn = st.button("🔍 解析含义")
result_placeholder = st.empty()
# 渲染历史对话
chat_history = agent.load_memory()
for idx, entry in enumerate(chat_history):
bubble = st.chat_message(entry["role"])
content = entry["content"]
if isinstance(content, str):
bubble.write(content)
already_generated = any(
entry["role"] == "assistant" and "含义" in str(entry["content"])
for entry in chat_history
)
if analyze_btn or (auto and not already_generated):
st.chat_message("user").write("请帮我解析数据含义")
agent.add_memory({"role": "user", "content": "请帮我解析数据含义"})
with st.spinner("分析中..."):
desc = agent.do_data_description(df)
agent.finish_auto()
st.chat_message("assistant").write(desc)
agent.add_memory({"role": "assistant", "content": desc})
st.rerun()
# 用户自定义输入
user_input = st.chat_input("请输入需求,例如“帮我分析xx列”")
if user_input:
st.chat_message("user").write(user_input)
agent.add_memory({"role": "user", "content": user_input})
with st.spinner("处理中…"):
reply = agent.do_data_description(df, user_input)
st.chat_message("assistant").write(reply)
agent.add_memory({"role": "assistant", "content": reply})
st.rerun()
if __name__ == "__main__":
agent = st.session_state.data_loading_agent
planner = st.session_state.planner_agent
auto = planner.loading_auto
if st.session_state.auto_mode == True:
if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.loading_auto == False:
planner.finish_loading_auto()
st.switch_page("workflow/preprocessing/preprocessing_render.py")
c1,c2 = st.columns(2)
with c1:
st.title("数据导入")
with c2:
st.write("")
st.write("")
sac.buttons([
sac.ButtonsItem(label='Github', icon='github', href='https://github.com/Jiaye-s-Group/AutoSTAT'),
sac.ButtonsItem(label='Doc', icon=sac.BsIcon(name='bi bi-file-earmark-post-fill', size=16), href='https://autostat.cc/docs/'),
sac.ButtonsItem(label='Web', icon=sac.BsIcon(name='bi bi-globe2', size=16), href='https://autostat.cc/'),
], align='end', color='dark', variant='filled', index=None)
st.markdown("---")
c = st.columns(2)
with c[0].expander('数据上传', True):
loading_data_file(agent)
with c[1].expander('数据建议', True):
loading_chat(agent, auto)
with c[0].expander('数据展示', True):
loading_basic_info(agent)