File size: 659 Bytes
2b60cf4 b052258 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | # THIS FILE IS NOT USED FOR THE CURRENT SYSTEM. ONLY USED FOR TRAINING EARLY VERSIONS OF THE SYSTEM.
import pandas as pd
df = pd.read_csv("corpus_clean.csv")
print(df.head())
print(df.columns)
print(df.shape)
print(df['text'].isnull().sum())
print((df['text'].str.strip()== "").sum())
df['length'] = df['text'].apply(lambda x: len(x.split()))
print(df['length'].describe())
print(df['grade'].value_counts().sort_index())
def map_level(grade):
if grade in [2, 3]:
return "Lower"
elif grade in [4, 5, 6]:
return "Upper"
else:
return "Secondary"
df["level"] = df["grade"].apply(map_level)
print(df["level"].value_counts()) |