import pandas as pd class DataLoader: """ Class responsible for loading and validating the dataset. """ def __init__(self, file_path): self.file_path = file_path def load_data(self): """ Load and preprocess the dataset. Returns: pd.DataFrame: Loaded and validated dataset. """ try: df = pd.read_csv(self.file_path) required_columns = ["book_name", "summaries", "categories"] if not all(col in df.columns for col in required_columns): raise ValueError(f"Dataset must include {', '.join(required_columns)} columns.") df.dropna(subset=["book_name", "summaries"], inplace=True) return df except FileNotFoundError: raise FileNotFoundError(f"File not found: {self.file_path}") except Exception as e: raise Exception(f"Error loading data: {e}")