dinusha11 commited on
Commit
7721643
·
verified ·
1 Parent(s): 2efecbb

Create preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +12 -0
preprocess.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ # Function to preprocess input text
4
+ def preprocess_text(text):
5
+ if pd.isna(text):
6
+ return ""
7
+ return text.strip().lower() # Simple lowercase cleaning
8
+
9
+ # Function to preprocess dataset
10
+ def preprocess_dataset(df):
11
+ df["text"] = df["text"].apply(preprocess_text)
12
+ return df