File size: 939 Bytes
a5f5b61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd

class DataLoader:
    """
    Class responsible for loading and validating the dataset.
    """
    def __init__(self, file_path):
        self.file_path = file_path

    def load_data(self):
        """
        Load and preprocess the dataset.

        Returns:
            pd.DataFrame: Loaded and validated dataset.
        """
        try:
            df = pd.read_csv(self.file_path)
            required_columns = ["book_name", "summaries", "categories"]
            if not all(col in df.columns for col in required_columns):
                raise ValueError(f"Dataset must include {', '.join(required_columns)} columns.")
            df.dropna(subset=["book_name", "summaries"], inplace=True)
            return df
        except FileNotFoundError:
            raise FileNotFoundError(f"File not found: {self.file_path}")
        except Exception as e:
            raise Exception(f"Error loading data: {e}")