File size: 659 Bytes
2b60cf4
b052258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# THIS FILE IS NOT USED FOR THE CURRENT SYSTEM. ONLY USED FOR TRAINING EARLY VERSIONS OF THE SYSTEM.
import pandas as pd

df = pd.read_csv("corpus_clean.csv")
print(df.head())
print(df.columns)
print(df.shape)
print(df['text'].isnull().sum())
print((df['text'].str.strip()== "").sum())
df['length'] = df['text'].apply(lambda x: len(x.split()))
print(df['length'].describe())
print(df['grade'].value_counts().sort_index())

def map_level(grade):
    if grade in [2, 3]:
        return "Lower"
    elif grade in [4, 5, 6]:
        return "Upper"
    else:
        return "Secondary"

df["level"] = df["grade"].apply(map_level)

print(df["level"].value_counts())