""" Utility functions for Streamlit components """ import streamlit as st import pandas as pd from typing import Optional @st.cache_data def load_data(file_path: str) -> Optional[pd.DataFrame]: """Load and cache data""" try: if file_path.endswith('.xlsx') or file_path.endswith('.xls'): return pd.read_excel(file_path) elif file_path.endswith('.csv'): return pd.read_csv(file_path) except Exception as e: st.error(f"Error loading file: {e}") return None def display_dataframe_stats(df: pd.DataFrame): """Display basic dataframe statistics""" col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Rows", df.shape[0]) with col2: st.metric("Columns", df.shape[1]) with col3: st.metric("Missing Values", df.isnull().sum().sum()) with col4: st.metric("Memory Usage", f"{df.memory_usage().sum() / 1024:.2f} KB") def display_column_info(df: pd.DataFrame): """Display information about dataframe columns""" st.subheader("Column Information") col_info = pd.DataFrame({ 'Column': df.columns, 'Type': df.dtypes.values, 'Non-Null Count': df.count().values, 'Null Count': df.isnull().sum().values, }) st.dataframe(col_info, use_container_width=True) def display_data_quality(df: pd.DataFrame): """Display data quality metrics""" st.subheader("Data Quality Assessment") col1, col2, col3 = st.columns(3) total_cells = df.shape[0] * df.shape[1] null_cells = df.isnull().sum().sum() completeness = ((total_cells - null_cells) / total_cells) * 100 with col1: st.metric("Data Completeness", f"{completeness:.2f}%") with col2: st.metric("Duplicate Rows", df.duplicated().sum()) with col3: st.metric("Numeric Columns", df.select_dtypes(include=['number']).shape[1])