Spaces:
Sleeping
Sleeping
| import io | |
| import traceback | |
| import numpy as np | |
| import pandas as pd | |
| import streamlit as st | |
| from streamlit_ace import st_ace | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import FunctionTransformer | |
| from sklearn.preprocessing import LabelEncoder, MinMaxScaler, OneHotEncoder, OrdinalEncoder, RobustScaler, StandardScaler | |
| from utils.sanitize_code import sanitize_code | |
| from workflow.preprocessing.preprocessing_core import prep_meta_execution, prep_code_gen | |
| def prep_basic_info(agent): | |
| df = agent.load_df() | |
| # 展示基本统计 | |
| r, c = df.shape | |
| missing = int(df.isnull().sum().sum()) | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("行数", r) | |
| col2.metric("列数", c) | |
| col3.metric("缺失值总数", missing) | |
| dtype_info = pd.DataFrame({ | |
| '列名': df.columns, | |
| '类型': df.dtypes.astype(str), | |
| '非空值数量': df.count().values, | |
| '缺失值比例(%)': (df.isnull().mean() * 100).round(2).values, | |
| }) | |
| dtype_info = dtype_info.reset_index(drop=True) | |
| st.dataframe(dtype_info, use_container_width=True) | |
| def prep_execution(agent, auto=False): | |
| ''' | |
| training data进行预处理 | |
| ''' | |
| code = agent.load_code() | |
| df = agent.load_df() | |
| process_df = prep_meta_execution(agent, code, df, auto=auto) | |
| def prep_result(agent): | |
| process_df = agent.load_processed_df() | |
| df = agent.load_df() | |
| if process_df is not None: | |
| st.write("处理前数据预览:", df.head(10)) | |
| st.write("处理后数据预览:", process_df.head(10)) | |
| csv_buffer = io.StringIO() | |
| process_df.to_csv(csv_buffer, index=False) | |
| csv_bytes = csv_buffer.getvalue().encode('utf-8') | |
| st.download_button( | |
| label="⬇️ 下载处理后数据", | |
| data=csv_bytes, | |
| file_name="processed_data.csv", | |
| mime="text/csv", | |
| ) | |
| def prep_chat(agent, auto=False): | |
| """渲染对话式建议区""" | |
| with st.chat_message("assistant"): | |
| st.write("我是 Anystat 数据分析助手,很高兴为您服务!\n\n" | |
| "您可以在下方输入预处理需求,或直接点击按钮获取预处理建议。") | |
| analyze_btn = st.button("🔍 预处理推荐", key='prep_suggest') | |
| # 对话历史渲染 | |
| chat_history = agent.load_memory() | |
| for idx, entry in enumerate(chat_history): | |
| bubble = st.chat_message(entry["role"]) | |
| content = entry["content"] | |
| if isinstance(content, str): | |
| bubble.write(content) | |
| already_generated = any( | |
| entry["role"] == "assistant" and "预处理" in str(entry["content"]) | |
| for entry in chat_history | |
| ) | |
| # 自动/手动触发 | |
| if analyze_btn or (auto and not already_generated): | |
| st.chat_message("user").write("请给我预处理建议") | |
| agent.add_memory({'role': 'user', 'content': "请给我预处理建议"}) | |
| with st.spinner("生成建议中…"): | |
| text = agent.get_preprocessing_suggestions() | |
| agent.save_preprocessing_suggestions(text) | |
| agent.refine_suggestions(df.head(10).to_string()) | |
| st.chat_message("assistant").write(text) | |
| agent.add_memory({'role': 'assistant', 'content': text}) | |
| # 用户自然语言交互 | |
| user_input = st.chat_input("请输入您的问题") | |
| if user_input: | |
| st.chat_message("user").write(user_input) | |
| agent.add_memory({'role': 'user', 'content': user_input}) | |
| agent.save_user_input(user_input) | |
| with st.spinner("处理中…"): | |
| reply = agent.get_preprocessing_suggestions(user_input) | |
| agent.save_preprocessing_suggestions(reply) | |
| agent.refine_suggestions(df.head(10).to_string()) | |
| st.chat_message('assistant').write(reply) | |
| agent.add_memory({'role': 'assistant', 'content': reply}) | |
| if __name__ == '__main__': | |
| st.title("数据预处理与标准化") | |
| st.markdown("---") | |
| data_loading_agent = st.session_state.data_loading_agent | |
| df = data_loading_agent.load_df() | |
| planner = st.session_state.planner_agent | |
| auto = planner.prep_auto | |
| if df is None: | |
| st.warning("⚠️ 请先在数据导入页面加载数据") | |
| st.stop() | |
| agent = st.session_state.data_preprocess_agent | |
| agent.add_df(df) | |
| if st.session_state.auto_mode == True: | |
| if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.prep_auto == False: | |
| planner.finish_prep_auto() | |
| st.switch_page("workflow/visualization/viz_render.py") | |
| code = agent.load_code() | |
| if code is None: | |
| code_expand = False | |
| else: | |
| code_expand = True | |
| c = st.columns(2) | |
| with c[0].expander('预处理展示', True): | |
| prep_basic_info(agent) | |
| with c[1].expander('预处理建议', True): | |
| prep_chat(agent, auto) | |
| prep_code_gen(agent, auto=auto) | |
| with c[0].expander('预处理执行', code_expand): | |
| prep_execution(agent, auto) | |
| with c[0].expander('预处理结果', code_expand): | |
| prep_result(agent) |