Spaces:
Sleeping
Sleeping
File size: 2,574 Bytes
56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 56290d3 c771784 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# data_loader.py
import streamlit as st
import pandas as pd
import os
# Try importing optional dependencies
try:
from datasets import load_dataset
import kagglehub
except ImportError:
pass # Handle usage inside functions
@st.cache_data(show_spinner="Loading CSV file...", ttl=3600)
def load_csv(file) -> pd.DataFrame:
try:
df = pd.read_csv(file)
df.reset_index(drop=True, inplace=True)
return df
except Exception as e:
st.error(f"Failed to load CSV: {e}")
return None
@st.cache_data(show_spinner="Downloading from Hugging Face...", ttl=3600)
def load_hf(path: str, split: str) -> pd.DataFrame:
try:
ds = load_dataset(path, split=split)
df = pd.DataFrame(ds)
return df
except Exception as e:
st.error(f"Failed to load HF dataset: {e}")
return None
@st.cache_data(show_spinner="Downloading from Kaggle...", ttl=3600)
def load_kaggle(kaggle_path: str, file_name: str) -> pd.DataFrame:
try:
path = kagglehub.dataset_download(kaggle_path)
full_path = os.path.join(path, file_name)
return pd.read_csv(full_path)
except Exception as e:
st.error(f"Failed to load Kaggle dataset: {e}")
return None
def dataset_sidebar():
st.sidebar.header("1️⃣ Data Ingestion")
source = st.sidebar.selectbox(
"Source Type",
["Upload CSV", "Hugging Face", "Kaggle"],
help="Select the source of your dataset."
)
df = None
if source == "Upload CSV":
file = st.sidebar.file_uploader("Drop CSV Here", type=["csv"])
if file:
df = load_csv(file)
elif source == "Hugging Face":
path = st.sidebar.text_input("Dataset Path (e.g., 'iris')", "iris")
split = st.sidebar.text_input("Split (e.g., 'train')", "train")
if st.sidebar.button("Fetch HF Dataset"):
df = load_hf(path, split)
elif source == "Kaggle":
kp = st.sidebar.text_input("Kaggle Path (user/dataset)")
fn = st.sidebar.text_input("CSV Filename inside dataset")
if st.sidebar.button("Fetch Kaggle Dataset"):
df = load_kaggle(kp, fn)
# State Update Logic
if df is not None:
if st.session_state.original_df is None or not df.equals(st.session_state.original_df):
st.session_state.original_df = df.copy()
# Initialize processed_df as a copy of original
st.session_state.processed_df = df.copy()
st.sidebar.success(f"Loaded: {df.shape[0]} rows, {df.shape[1]} cols") |