| import os | |
| import sys | |
| import pandas as pd | |
| # Support running the file both as a module and as a direct script. | |
| CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| BACKEND_DIR = os.path.abspath(os.path.join(CURRENT_DIR, "..")) | |
| if BACKEND_DIR not in sys.path: | |
| sys.path.insert(0, BACKEND_DIR) | |
| from feature_core import extract_features, load_nlp_model # noqa: E402 | |
| # Load corpus | |
| _df = pd.read_csv("corpus_with_group.csv") | |
| # Load model once | |
| _nlp = load_nlp_model("tl_calamancy_md-0.2.0") | |
| # Apply features | |
| features = _df["text"].apply(lambda text: pd.Series(extract_features(text, _nlp))) | |
| _df = pd.concat([_df, features], axis=1) | |
| # Ensure integer columns are stored as ints | |
| _df["num_words"] = _df["num_words"].astype(int) | |
| _df["num_sentences"] = _df["num_sentences"].astype(int) | |
| _df["polysyllabic_words"] = _df["polysyllabic_words"].astype(int) | |
| _df.to_csv("Feature_Extracted_Corpus.csv", index=False) | |
| print(_df.head()) | |