Alalay / models /testing.py
Jandayl's picture
added comments
2b60cf4
raw
history blame contribute delete
659 Bytes
# THIS FILE IS NOT USED FOR THE CURRENT SYSTEM. ONLY USED FOR TRAINING EARLY VERSIONS OF THE SYSTEM.
import pandas as pd
df = pd.read_csv("corpus_clean.csv")
print(df.head())
print(df.columns)
print(df.shape)
print(df['text'].isnull().sum())
print((df['text'].str.strip()== "").sum())
df['length'] = df['text'].apply(lambda x: len(x.split()))
print(df['length'].describe())
print(df['grade'].value_counts().sort_index())
def map_level(grade):
if grade in [2, 3]:
return "Lower"
elif grade in [4, 5, 6]:
return "Upper"
else:
return "Secondary"
df["level"] = df["grade"].apply(map_level)
print(df["level"].value_counts())