import os import sys import pandas as pd # Support running the file both as a module and as a direct script. CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) BACKEND_DIR = os.path.abspath(os.path.join(CURRENT_DIR, "..")) if BACKEND_DIR not in sys.path: sys.path.insert(0, BACKEND_DIR) from feature_core import extract_features, load_nlp_model # noqa: E402 # Load corpus _df = pd.read_csv("corpus_with_group.csv") # Load model once _nlp = load_nlp_model("tl_calamancy_md-0.2.0") # Apply features features = _df["text"].apply(lambda text: pd.Series(extract_features(text, _nlp))) _df = pd.concat([_df, features], axis=1) # Ensure integer columns are stored as ints _df["num_words"] = _df["num_words"].astype(int) _df["num_sentences"] = _df["num_sentences"].astype(int) _df["polysyllabic_words"] = _df["polysyllabic_words"].astype(int) _df.to_csv("Feature_Extracted_Corpus.csv", index=False) print(_df.head())