Spaces:
Running
Running
| import os | |
| from typing import List, Optional | |
| import pandas as pd | |
| import streamlit as st | |
| import streamlit_antd_components as sac | |
| from workflow.dataloading.dataloading_core import process_complex_data, load_from_path, load_concat_file, PathFileWrapper | |
| def loading_data_file(agent): | |
| st.info( | |
| "💡 提示:\n" | |
| "1. 支持一次上传多个数据文件\n" | |
| "2. 自动使用大模型分析并处理数据\n" | |
| "3. 支持多种格式的文件类型上传\n" | |
| ) | |
| selected_index = sac.tabs([ | |
| sac.TabsItem(label='本地上传'), | |
| sac.TabsItem(label='路径导入'), | |
| ], color='#5980AE',) | |
| if selected_index == "本地上传": | |
| # 点击上传文件 | |
| uploaded_files = st.file_uploader( | |
| "选择新文件", | |
| accept_multiple_files=True, | |
| help="拖拽或点击上传多个文件", | |
| ) | |
| if uploaded_files: | |
| current_memory_file_name = agent.load_file_name() | |
| new_files = [f for f in uploaded_files if f.name not in current_memory_file_name] | |
| if new_files: | |
| try: | |
| with st.spinner("正在处理数据..."): | |
| df, dfs = process_complex_data(new_files, agent) | |
| if df is not None: | |
| agent.add_df(df) | |
| agent.save_dfs(dfs) | |
| for f in new_files: | |
| agent.save_file_name(f.name) | |
| st.rerun() | |
| except Exception as err: | |
| st.error(f"导入失败:{err}") | |
| elif selected_index == "路径导入": | |
| # 路径上传文件 | |
| raw_paths = st.text_area( | |
| "从路径导入数据 (每行一个文件路径)", | |
| placeholder= "C:\\data\\iris.names\nC:\\data\\iris.data", | |
| height=100 | |
| ) | |
| if st.button("从路径加载文件", use_container_width=True): | |
| if raw_paths: | |
| path_list = [p.strip().strip("'\"") for p in raw_paths.strip().split('\n') if p.strip()] | |
| valid_paths = [p for p in path_list if os.path.exists(p)] | |
| invalid_paths = [p for p in path_list if not os.path.exists(p)] | |
| if invalid_paths: | |
| st.warning(f"路径不存在,已跳过:\n- " + "\n- ".join(invalid_paths)) | |
| if not valid_paths: | |
| st.error("未找到任何有效的本地文件路径。") | |
| else: | |
| current_memory_file_name = agent.load_file_name() | |
| new_paths = [p for p in valid_paths if p not in current_memory_file_name] | |
| if not new_paths: | |
| st.info("所有指定的路径文件均已加载。") | |
| else: | |
| files_to_process = [PathFileWrapper(p) for p in new_paths] | |
| try: | |
| with st.spinner("正在处理数据..."): | |
| df, dfs = process_complex_data(files_to_process, agent) | |
| if df is not None: | |
| agent.add_df(df) | |
| agent.save_dfs(dfs) | |
| for p in new_paths: | |
| agent.save_file_name(p) | |
| st.rerun() | |
| except Exception as err: | |
| st.error(f"本地文件读取失败:{err}") | |
| dfs = agent.load_dfs() | |
| if dfs is not None and len(dfs) >= 2: | |
| load_concat_file(dfs, agent) | |
| def loading_basic_info(agent): | |
| df = agent.load_df() | |
| if df is not None: | |
| r, c = df.shape | |
| missing = int(df.isnull().sum().sum()) | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("行数", r) | |
| col2.metric("列数", c) | |
| col3.metric("缺失值总数", missing) | |
| dtype_info = pd.DataFrame({ | |
| "列名": df.columns, | |
| "类型": df.dtypes.astype(str), | |
| "非空": df.count().values, | |
| "缺失%": (df.isnull().mean() * 100).round(2).values, | |
| }).reset_index(drop=True) | |
| selected_index = sac.tabs([ | |
| sac.TabsItem(label='数据类型概览'), | |
| sac.TabsItem(label='数据预览'), | |
| ],color='#5980AE',) | |
| if selected_index == "数据类型概览": | |
| st.dataframe(dtype_info, use_container_width=True) | |
| elif selected_index == "数据预览": | |
| if st.button("🎲 随机抽样"): | |
| display_df = df.sample(10) | |
| st.dataframe(display_df, use_container_width=True) | |
| else: | |
| st.dataframe(df.head(10), use_container_width=True) | |
| def loading_chat(agent, auto=False) -> None: | |
| df = agent.load_df() | |
| if df is None: | |
| return | |
| with st.chat_message("assistant"): | |
| st.write( | |
| "我是 Autostat 数据分析助手,很高兴为您服务!\n\n" | |
| "请先上传您的数据文件,上传完成后,您可以在下方和我对话,也可以直接点击按钮解析数据含义。" | |
| ) | |
| analyze_btn = st.button("🔍 解析含义") | |
| result_placeholder = st.empty() | |
| # 渲染历史对话 | |
| chat_history = agent.load_memory() | |
| for idx, entry in enumerate(chat_history): | |
| bubble = st.chat_message(entry["role"]) | |
| content = entry["content"] | |
| if isinstance(content, str): | |
| bubble.write(content) | |
| already_generated = any( | |
| entry["role"] == "assistant" and "含义" in str(entry["content"]) | |
| for entry in chat_history | |
| ) | |
| if analyze_btn or (auto and not already_generated): | |
| st.chat_message("user").write("请帮我解析数据含义") | |
| agent.add_memory({"role": "user", "content": "请帮我解析数据含义"}) | |
| with st.spinner("分析中..."): | |
| desc = agent.do_data_description(df) | |
| agent.finish_auto() | |
| st.chat_message("assistant").write(desc) | |
| agent.add_memory({"role": "assistant", "content": desc}) | |
| st.rerun() | |
| # 用户自定义输入 | |
| user_input = st.chat_input("请输入需求,例如“帮我分析xx列”") | |
| if user_input: | |
| st.chat_message("user").write(user_input) | |
| agent.add_memory({"role": "user", "content": user_input}) | |
| with st.spinner("处理中…"): | |
| reply = agent.do_data_description(df, user_input) | |
| st.chat_message("assistant").write(reply) | |
| agent.add_memory({"role": "assistant", "content": reply}) | |
| st.rerun() | |
| if __name__ == "__main__": | |
| agent = st.session_state.data_loading_agent | |
| planner = st.session_state.planner_agent | |
| auto = planner.loading_auto | |
| if st.session_state.auto_mode == True: | |
| if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.loading_auto == False: | |
| planner.finish_loading_auto() | |
| st.switch_page("workflow/preprocessing/preprocessing_render.py") | |
| c1,c2 = st.columns(2) | |
| with c1: | |
| st.title("数据导入") | |
| with c2: | |
| st.write("") | |
| st.write("") | |
| sac.buttons([ | |
| sac.ButtonsItem(label='Github', icon='github', href='https://github.com/Jiaye-s-Group/AutoSTAT'), | |
| sac.ButtonsItem(label='Doc', icon=sac.BsIcon(name='bi bi-file-earmark-post-fill', size=16), href='https://autostat.cc/docs/'), | |
| sac.ButtonsItem(label='Web', icon=sac.BsIcon(name='bi bi-globe2', size=16), href='https://autostat.cc/'), | |
| ], align='end', color='dark', variant='filled', index=None) | |
| st.markdown("---") | |
| c = st.columns(2) | |
| with c[0].expander('数据上传', True): | |
| loading_data_file(agent) | |
| with c[1].expander('数据建议', True): | |
| loading_chat(agent, auto) | |
| with c[0].expander('数据展示', True): | |
| loading_basic_info(agent) | |