Spaces:
Sleeping
Sleeping
File size: 2,016 Bytes
7ec734c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | """
data_utils.py
This module contains all data loading, cleaning, and processing functions.
All functions are decorated with @st.cache_data (or similar caching)
to speed up subsequent app runs.
"""
import os
import glob
import base64
import datetime
import tempfile
import numpy as np
import pandas as pd
import polars as pl
import requests
import streamlit as st
import streamlit.components.v1 as components
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats
from scipy.stats import gaussian_kde
from great_tables import GT, md, html
from huggingface_hub import HfApi, hf_hub_download, snapshot_download
@st.cache_data
def load_daily():
token = os.getenv("hf_token") or st.secrets.get("hf_token")
if not token:
st.error("Hugging Face token not found. Please check your configuration.")
return None
# Create a cache directory (inside the system temp folder) if it doesn't already exist.
cache_dir = os.path.join(tempfile.gettempdir(), "hf_data_cache")
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir, exist_ok=True)
except Exception as e:
st.error(f"Could not create cache directory: {e}")
return None
try:
# Use hf_hub_download to pull the file from the Hugging Face dataset repository.
local_path = hf_hub_download(
repo_id="tonadeleon/images_app", # Repository ID
filename="data/cstore_transactions_daily_agg.parquet", # File path in the repo
revision="main", # Branch or commit
repo_type="dataset",
token=token,
cache_dir=cache_dir
)
# Read the downloaded parquet file using Polars.
daily = pl.read_parquet(local_path)
except Exception as e:
st.error(f"Error downloading or loading the data: {e}")
return None
return daily
|