Spaces:
Sleeping
Sleeping
Create preprocess.py
Browse files- preprocess.py +12 -0
preprocess.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
# Function to preprocess input text
|
| 4 |
+
def preprocess_text(text):
|
| 5 |
+
if pd.isna(text):
|
| 6 |
+
return ""
|
| 7 |
+
return text.strip().lower() # Simple lowercase cleaning
|
| 8 |
+
|
| 9 |
+
# Function to preprocess dataset
|
| 10 |
+
def preprocess_dataset(df):
|
| 11 |
+
df["text"] = df["text"].apply(preprocess_text)
|
| 12 |
+
return df
|