Spaces:

asenturisk
/

Benchmark-Kit-26

Sleeping

App Files Files Community

Benchmark-Kit-26 / src /data_loader.py

dwmk

Update src/data_loader.py

c771784 verified 10 days ago

raw

history blame contribute delete

2.57 kB

	# data_loader.py
	import streamlit as st
	import pandas as pd
	import os

	# Try importing optional dependencies
	try:
	from datasets import load_dataset
	import kagglehub
	except ImportError:
	pass # Handle usage inside functions

	@st.cache_data(show_spinner="Loading CSV file...", ttl=3600)
	def load_csv(file) -> pd.DataFrame:
	try:
	df = pd.read_csv(file)
	df.reset_index(drop=True, inplace=True)
	return df
	except Exception as e:
	st.error(f"Failed to load CSV: {e}")
	return None

	@st.cache_data(show_spinner="Downloading from Hugging Face...", ttl=3600)
	def load_hf(path: str, split: str) -> pd.DataFrame:
	try:
	ds = load_dataset(path, split=split)
	df = pd.DataFrame(ds)
	return df
	except Exception as e:
	st.error(f"Failed to load HF dataset: {e}")
	return None

	@st.cache_data(show_spinner="Downloading from Kaggle...", ttl=3600)
	def load_kaggle(kaggle_path: str, file_name: str) -> pd.DataFrame:
	try:
	path = kagglehub.dataset_download(kaggle_path)
	full_path = os.path.join(path, file_name)
	return pd.read_csv(full_path)
	except Exception as e:
	st.error(f"Failed to load Kaggle dataset: {e}")
	return None

	def dataset_sidebar():
	st.sidebar.header("1️⃣ Data Ingestion")

	source = st.sidebar.selectbox(
	"Source Type",
	["Upload CSV", "Hugging Face", "Kaggle"],
	help="Select the source of your dataset."
	)

	df = None

	if source == "Upload CSV":
	file = st.sidebar.file_uploader("Drop CSV Here", type=["csv"])
	if file:
	df = load_csv(file)

	elif source == "Hugging Face":
	path = st.sidebar.text_input("Dataset Path (e.g., 'iris')", "iris")
	split = st.sidebar.text_input("Split (e.g., 'train')", "train")
	if st.sidebar.button("Fetch HF Dataset"):
	df = load_hf(path, split)

	elif source == "Kaggle":
	kp = st.sidebar.text_input("Kaggle Path (user/dataset)")
	fn = st.sidebar.text_input("CSV Filename inside dataset")
	if st.sidebar.button("Fetch Kaggle Dataset"):
	df = load_kaggle(kp, fn)

	# State Update Logic
	if df is not None:
	if st.session_state.original_df is None or not df.equals(st.session_state.original_df):
	st.session_state.original_df = df.copy()
	# Initialize processed_df as a copy of original
	st.session_state.processed_df = df.copy()
	st.sidebar.success(f"Loaded: {df.shape[0]} rows, {df.shape[1]} cols")