| |
| import pandas as pd |
|
|
| df = pd.read_csv("corpus_clean.csv") |
| print(df.head()) |
| print(df.columns) |
| print(df.shape) |
| print(df['text'].isnull().sum()) |
| print((df['text'].str.strip()== "").sum()) |
| df['length'] = df['text'].apply(lambda x: len(x.split())) |
| print(df['length'].describe()) |
| print(df['grade'].value_counts().sort_index()) |
|
|
| def map_level(grade): |
| if grade in [2, 3]: |
| return "Lower" |
| elif grade in [4, 5, 6]: |
| return "Upper" |
| else: |
| return "Secondary" |
|
|
| df["level"] = df["grade"].apply(map_level) |
|
|
| print(df["level"].value_counts()) |