Spaces:
Sleeping
Sleeping
| # data_loader.py | |
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| # Try importing optional dependencies | |
| try: | |
| from datasets import load_dataset | |
| import kagglehub | |
| except ImportError: | |
| pass # Handle usage inside functions | |
| def load_csv(file) -> pd.DataFrame: | |
| try: | |
| df = pd.read_csv(file) | |
| df.reset_index(drop=True, inplace=True) | |
| return df | |
| except Exception as e: | |
| st.error(f"Failed to load CSV: {e}") | |
| return None | |
| def load_hf(path: str, split: str) -> pd.DataFrame: | |
| try: | |
| ds = load_dataset(path, split=split) | |
| df = pd.DataFrame(ds) | |
| return df | |
| except Exception as e: | |
| st.error(f"Failed to load HF dataset: {e}") | |
| return None | |
| def load_kaggle(kaggle_path: str, file_name: str) -> pd.DataFrame: | |
| try: | |
| path = kagglehub.dataset_download(kaggle_path) | |
| full_path = os.path.join(path, file_name) | |
| return pd.read_csv(full_path) | |
| except Exception as e: | |
| st.error(f"Failed to load Kaggle dataset: {e}") | |
| return None | |
| def dataset_sidebar(): | |
| st.sidebar.header("1️⃣ Data Ingestion") | |
| source = st.sidebar.selectbox( | |
| "Source Type", | |
| ["Upload CSV", "Hugging Face", "Kaggle"], | |
| help="Select the source of your dataset." | |
| ) | |
| df = None | |
| if source == "Upload CSV": | |
| file = st.sidebar.file_uploader("Drop CSV Here", type=["csv"]) | |
| if file: | |
| df = load_csv(file) | |
| elif source == "Hugging Face": | |
| path = st.sidebar.text_input("Dataset Path (e.g., 'iris')", "iris") | |
| split = st.sidebar.text_input("Split (e.g., 'train')", "train") | |
| if st.sidebar.button("Fetch HF Dataset"): | |
| df = load_hf(path, split) | |
| elif source == "Kaggle": | |
| kp = st.sidebar.text_input("Kaggle Path (user/dataset)") | |
| fn = st.sidebar.text_input("CSV Filename inside dataset") | |
| if st.sidebar.button("Fetch Kaggle Dataset"): | |
| df = load_kaggle(kp, fn) | |
| # State Update Logic | |
| if df is not None: | |
| if st.session_state.original_df is None or not df.equals(st.session_state.original_df): | |
| st.session_state.original_df = df.copy() | |
| # Initialize processed_df as a copy of original | |
| st.session_state.processed_df = df.copy() | |
| st.sidebar.success(f"Loaded: {df.shape[0]} rows, {df.shape[1]} cols") |