# THIS FILE IS NOT USED FOR THE CURRENT SYSTEM. ONLY USED FOR TRAINING EARLY VERSIONS OF THE SYSTEM. import pandas as pd df = pd.read_csv("corpus_clean.csv") print(df.head()) print(df.columns) print(df.shape) print(df['text'].isnull().sum()) print((df['text'].str.strip()== "").sum()) df['length'] = df['text'].apply(lambda x: len(x.split())) print(df['length'].describe()) print(df['grade'].value_counts().sort_index()) def map_level(grade): if grade in [2, 3]: return "Lower" elif grade in [4, 5, 6]: return "Upper" else: return "Secondary" df["level"] = df["grade"].apply(map_level) print(df["level"].value_counts())