Spaces:

DS460WI25
/

de_leon_wi25_challenge

Sleeping

File size: 2,016 Bytes

7ec734c

"""

data_utils.py



This module contains all data loading, cleaning, and processing functions.

All functions are decorated with @st.cache_data (or similar caching)

to speed up subsequent app runs.

"""

import os
import glob
import base64
import datetime
import tempfile
import numpy as np
import pandas as pd
import polars as pl
import requests

import streamlit as st
import streamlit.components.v1 as components

import plotly.express as px
import plotly.graph_objects as go

from scipy import stats
from scipy.stats import gaussian_kde

from great_tables import GT, md, html

from huggingface_hub import HfApi, hf_hub_download, snapshot_download


@st.cache_data
def load_daily():
    token = os.getenv("hf_token") or st.secrets.get("hf_token")
    if not token:
        st.error("Hugging Face token not found. Please check your configuration.")
        return None

    # Create a cache directory (inside the system temp folder) if it doesn't already exist.
    cache_dir = os.path.join(tempfile.gettempdir(), "hf_data_cache")
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
        except Exception as e:
            st.error(f"Could not create cache directory: {e}")
            return None

    try:
        # Use hf_hub_download to pull the file from the Hugging Face dataset repository.
        local_path = hf_hub_download(
            repo_id="tonadeleon/images_app",       # Repository ID
            filename="data/cstore_transactions_daily_agg.parquet",  # File path in the repo
            revision="main",                       # Branch or commit
            repo_type="dataset",
            token=token,
            cache_dir=cache_dir
        )
        # Read the downloaded parquet file using Polars.
        daily = pl.read_parquet(local_path)
    except Exception as e:
        st.error(f"Error downloading or loading the data: {e}")
        return None

    return daily