Spaces:
Sleeping
Sleeping
| """ | |
| Utility functions for Streamlit components | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| from typing import Optional | |
| def load_data(file_path: str) -> Optional[pd.DataFrame]: | |
| """Load and cache data""" | |
| try: | |
| if file_path.endswith('.xlsx') or file_path.endswith('.xls'): | |
| return pd.read_excel(file_path) | |
| elif file_path.endswith('.csv'): | |
| return pd.read_csv(file_path) | |
| except Exception as e: | |
| st.error(f"Error loading file: {e}") | |
| return None | |
| def display_dataframe_stats(df: pd.DataFrame): | |
| """Display basic dataframe statistics""" | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Rows", df.shape[0]) | |
| with col2: | |
| st.metric("Columns", df.shape[1]) | |
| with col3: | |
| st.metric("Missing Values", df.isnull().sum().sum()) | |
| with col4: | |
| st.metric("Memory Usage", f"{df.memory_usage().sum() / 1024:.2f} KB") | |
| def display_column_info(df: pd.DataFrame): | |
| """Display information about dataframe columns""" | |
| st.subheader("Column Information") | |
| col_info = pd.DataFrame({ | |
| 'Column': df.columns, | |
| 'Type': df.dtypes.values, | |
| 'Non-Null Count': df.count().values, | |
| 'Null Count': df.isnull().sum().values, | |
| }) | |
| st.dataframe(col_info, use_container_width=True) | |
| def display_data_quality(df: pd.DataFrame): | |
| """Display data quality metrics""" | |
| st.subheader("Data Quality Assessment") | |
| col1, col2, col3 = st.columns(3) | |
| total_cells = df.shape[0] * df.shape[1] | |
| null_cells = df.isnull().sum().sum() | |
| completeness = ((total_cells - null_cells) / total_cells) * 100 | |
| with col1: | |
| st.metric("Data Completeness", f"{completeness:.2f}%") | |
| with col2: | |
| st.metric("Duplicate Rows", df.duplicated().sum()) | |
| with col3: | |
| st.metric("Numeric Columns", df.select_dtypes(include=['number']).shape[1]) | |