from __future__ import annotations from pathlib import Path from typing import Any, Dict, Optional def load_kaggle_procurement_tables(base_path: str | Path) -> Dict[str, Any]: try: import pandas as pd # type: ignore[import-not-found] except ImportError as exc: raise RuntimeError("pandas is required for Kaggle dataset ingestion") from exc base = Path(base_path) if not base.exists(): raise FileNotFoundError(f"Kaggle dataset path not found: {base}") tables: Dict[str, Any] = {} for table_name in ["invoices", "suppliers", "departments", "labels"]: parquet_path = base / f"{table_name}.parquet" csv_path = base / f"{table_name}.csv" if parquet_path.exists(): tables[table_name] = pd.read_parquet(parquet_path) elif csv_path.exists(): tables[table_name] = pd.read_csv(csv_path) if "invoices" not in tables: raise RuntimeError("Expected invoices table in Kaggle dataset folder") return tables def load_hf_financial_rows(split: str = "train", max_rows: Optional[int] = None) -> list[dict[str, Any]]: try: from datasets import load_dataset except ImportError as exc: raise RuntimeError("datasets library is required for Hugging Face dataset ingestion") from exc ds = load_dataset("amitkedia/Financial-Fraud-Dataset", split=split) if max_rows is not None: ds = ds.select(range(min(max_rows, len(ds)))) return [dict(row) for row in ds]