abraham9486937737
Deploy MySpace Ooty Analytics to Hugging Face - with KPI styling updates
04b129a
"""
Utility functions for Streamlit components
"""
import streamlit as st
import pandas as pd
from typing import Optional
@st.cache_data
def load_data(file_path: str) -> Optional[pd.DataFrame]:
"""Load and cache data"""
try:
if file_path.endswith('.xlsx') or file_path.endswith('.xls'):
return pd.read_excel(file_path)
elif file_path.endswith('.csv'):
return pd.read_csv(file_path)
except Exception as e:
st.error(f"Error loading file: {e}")
return None
def display_dataframe_stats(df: pd.DataFrame):
"""Display basic dataframe statistics"""
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Rows", df.shape[0])
with col2:
st.metric("Columns", df.shape[1])
with col3:
st.metric("Missing Values", df.isnull().sum().sum())
with col4:
st.metric("Memory Usage", f"{df.memory_usage().sum() / 1024:.2f} KB")
def display_column_info(df: pd.DataFrame):
"""Display information about dataframe columns"""
st.subheader("Column Information")
col_info = pd.DataFrame({
'Column': df.columns,
'Type': df.dtypes.values,
'Non-Null Count': df.count().values,
'Null Count': df.isnull().sum().values,
})
st.dataframe(col_info, use_container_width=True)
def display_data_quality(df: pd.DataFrame):
"""Display data quality metrics"""
st.subheader("Data Quality Assessment")
col1, col2, col3 = st.columns(3)
total_cells = df.shape[0] * df.shape[1]
null_cells = df.isnull().sum().sum()
completeness = ((total_cells - null_cells) / total_cells) * 100
with col1:
st.metric("Data Completeness", f"{completeness:.2f}%")
with col2:
st.metric("Duplicate Rows", df.duplicated().sum())
with col3:
st.metric("Numeric Columns", df.select_dtypes(include=['number']).shape[1])