| import pandas as pd | |
| class DataLoader: | |
| """ | |
| Class responsible for loading and validating the dataset. | |
| """ | |
| def __init__(self, file_path): | |
| self.file_path = file_path | |
| def load_data(self): | |
| """ | |
| Load and preprocess the dataset. | |
| Returns: | |
| pd.DataFrame: Loaded and validated dataset. | |
| """ | |
| try: | |
| df = pd.read_csv(self.file_path) | |
| required_columns = ["book_name", "summaries", "categories"] | |
| if not all(col in df.columns for col in required_columns): | |
| raise ValueError(f"Dataset must include {', '.join(required_columns)} columns.") | |
| df.dropna(subset=["book_name", "summaries"], inplace=True) | |
| return df | |
| except FileNotFoundError: | |
| raise FileNotFoundError(f"File not found: {self.file_path}") | |
| except Exception as e: | |
| raise Exception(f"Error loading data: {e}") | |