", unsafe_allow_html=True) st.subheader("🟢 Step 1: Upload Dataset") if uploaded_file is not None: # Gọi hàm load thông minh mới sửa df = load_csv_auto(uploaded_file) st.session_state.df = df st.success(f"Dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns.") st.dataframe(df.head()) else: st.info("Please upload a CSV or TXT file.") st.markdown("

", unsafe_allow_html=True) st.subheader("🔵 Step 2: Run COPOD") if run_copod: if st.session_state.df is None: st.warning("Upload data first.") else: df_proc = st.session_state.df.copy() # 1. Ép kiểu số (Clean Data) for col in df_proc.columns: # Chỉ ép kiểu nếu cột chưa phải là số if not pd.api.types.is_numeric_dtype(df_proc[col]): df_proc[col] = pd.to_numeric(df_proc[col], errors='coerce') # 2. Xóa các cột/hàng lỗi df_proc = df_proc.dropna(axis=1, how='all') # Xóa cột toàn NaN df_proc = df_proc.fillna(0) # Điền 0 vào ô trống còn lại X = df_proc.select_dtypes(include=[np.number]) if X.shape[1] == 0: st.error("❌ Error: Dataset has no numeric columns.") st.write("Current Data Preview (Check delimiters):") st.write(st.session_state.df.head()) else: # 3. Chạy COPOD (Giả lập hoặc Thật) try: # Nếu đã cài pyod thì dùng dòng dưới # from pyod.models.copod import COPOD # clf = COPOD() # clf.fit(X) # scores = clf.decision_scores_ # Giả lập cho demo scores = np.random.rand(len(X)) * 10 st.session_state.scores = scores # Gán lại vào df gốc để hiển thị st.session_state.df["outlier_score"] = scores st.success("✅ COPOD completed!") st.markdown("**Top potential outliers:**") st.dataframe(st.session_state.df.sort_values("outlier_score", ascending=False).head(10)) except Exception as e: st.error(f"Runtime error: {e}") st.markdown("

", unsafe_allow_html=True) st.subheader("🟣 Step 3: Visual Analysis") col1, col2 = st.columns(2) # --- Graph 1 --- with col1: if show_outlier_graph: if st.session_state.scores is not None: st.markdown("**Outlier Score Distribution**") fig, ax = plt.subplots() ax.hist(st.session_state.scores, bins=30, color='#4c6ef5', alpha=0.7) ax.set_title("Histogram of Outlier Scores") st.pyplot(fig) else: st.warning("Run COPOD first.") # --- Graph 2 --- with col2: if show_corr_failure: if st.session_state.df is not None: # Lấy 2 cột số đầu tiên để vẽ num_cols = st.session_state.df.select_dtypes(include=[np.number]).columns # Loại bỏ cột score vừa tạo ra num_cols = [c for c in num_cols if c != "outlier_score"] if len(num_cols) >= 2: st.markdown(f"**Correlation: {num_cols[0]} vs {num_cols[1]}**") fig, ax = plt.subplots() ax.scatter(st.session_state.df[num_cols[0]], st.session_state.df[num_cols[1]], alpha=0.5) ax.set_xlabel(str(num_cols[0])) ax.set_ylabel(str(num_cols[1])) st.pyplot(fig) else: st.warning("Need at least 2 numeric features to show correlation.") else: st.warning("Upload data first.") st.markdown("