rohitmsan commited on
Commit
a5f5b61
·
verified ·
1 Parent(s): 4ba61d6

Create data_loader.py

Browse files
Files changed (1) hide show
  1. data_loader.py +29 -0
data_loader.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ class DataLoader:
4
+ """
5
+ Class responsible for loading and validating the dataset.
6
+ """
7
+ def __init__(self, file_path):
8
+ self.file_path = file_path
9
+
10
+ def load_data(self):
11
+ """
12
+ Load and preprocess the dataset.
13
+
14
+ Returns:
15
+ pd.DataFrame: Loaded and validated dataset.
16
+ """
17
+ try:
18
+ df = pd.read_csv(self.file_path)
19
+ required_columns = ["book_name", "summaries", "categories"]
20
+ if not all(col in df.columns for col in required_columns):
21
+ raise ValueError(f"Dataset must include {', '.join(required_columns)} columns.")
22
+ df.dropna(subset=["book_name", "summaries"], inplace=True)
23
+ return df
24
+ except FileNotFoundError:
25
+ raise FileNotFoundError(f"File not found: {self.file_path}")
26
+ except Exception as e:
27
+ raise Exception(f"Error loading data: {e}")
28
+
29
+