File size: 939 Bytes
a5f5b61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import pandas as pd
class DataLoader:
"""
Class responsible for loading and validating the dataset.
"""
def __init__(self, file_path):
self.file_path = file_path
def load_data(self):
"""
Load and preprocess the dataset.
Returns:
pd.DataFrame: Loaded and validated dataset.
"""
try:
df = pd.read_csv(self.file_path)
required_columns = ["book_name", "summaries", "categories"]
if not all(col in df.columns for col in required_columns):
raise ValueError(f"Dataset must include {', '.join(required_columns)} columns.")
df.dropna(subset=["book_name", "summaries"], inplace=True)
return df
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {self.file_path}")
except Exception as e:
raise Exception(f"Error loading data: {e}")
|