""" data_utils.py This module contains all data loading, cleaning, and processing functions. All functions are decorated with @st.cache_data (or similar caching) to speed up subsequent app runs. """ import os import glob import base64 import datetime import tempfile import numpy as np import pandas as pd import polars as pl import requests import streamlit as st import streamlit.components.v1 as components import plotly.express as px import plotly.graph_objects as go from scipy import stats from scipy.stats import gaussian_kde from great_tables import GT, md, html from huggingface_hub import HfApi, hf_hub_download, snapshot_download @st.cache_data def load_daily(): token = os.getenv("hf_token") or st.secrets.get("hf_token") if not token: st.error("Hugging Face token not found. Please check your configuration.") return None # Create a cache directory (inside the system temp folder) if it doesn't already exist. cache_dir = os.path.join(tempfile.gettempdir(), "hf_data_cache") if not os.path.exists(cache_dir): try: os.makedirs(cache_dir, exist_ok=True) except Exception as e: st.error(f"Could not create cache directory: {e}") return None try: # Use hf_hub_download to pull the file from the Hugging Face dataset repository. local_path = hf_hub_download( repo_id="tonadeleon/images_app", # Repository ID filename="data/cstore_transactions_daily_agg.parquet", # File path in the repo revision="main", # Branch or commit repo_type="dataset", token=token, cache_dir=cache_dir ) # Read the downloaded parquet file using Polars. daily = pl.read_parquet(local_path) except Exception as e: st.error(f"Error downloading or loading the data: {e}") return None return daily