import spacy import pandas as pd import subprocess try: spacy.load("en_core_web_sm") # Try loading spaCy model except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True) class NegativeWordReplacer: def __init__(self, excel_path): """Initialize spaCy NLP model and load replacement dictionary.""" self.nlp = spacy.load("en_core_web_sm") # ✅ Load spaCy before calling it self.replacement_dict = self.load_replacement_dict(excel_path) def normalize_pos(self, pos): """Convert spaCy POS tags to match the Excel sheet.""" pos_mapping = { "NOUN": "noun", "VERB": "verb", "ADJ": "adjective", "ADV": "adverb" } return pos_mapping.get(pos, None) def load_replacement_dict(self, excel_path): """Load and preprocess the Excel sheet into a dictionary.""" df = pd.read_excel(excel_path) replacement_dict = {} for _, row in df.iterrows(): neg_word = str(row['Exaggerated Word']).lower().strip() pos_tag = str(row['POS']).lower().strip() neutral_word = str(row['Neutral Word']).lower().strip() doc = self.nlp(neg_word) lemma = doc[0].lemma_ replacement_dict[(neg_word, pos_tag)] = neutral_word replacement_dict[(lemma, pos_tag)] = neutral_word # Store root word too return replacement_dict def replace_negative_words(self, sentence): """Replace negative words in a sentence using the dictionary.""" doc = self.nlp(sentence) new_tokens = [] for token in doc: lemma = token.lemma_.lower() pos = self.normalize_pos(token.pos_) word_lower = token.text.lower() # Convert to lowercase for lookup # ✅ Fix: "amod" dependency only applies to adjectives if token.dep_ == "amod": pos = "adjective" if pos: # ✅ Check both lowercase word and its lemma replacement = self.replacement_dict.get((word_lower, pos)) or \ self.replacement_dict.get((lemma, pos)) if replacement: # Keep original capitalization if token.text[0].isupper(): replacement = replacement.capitalize() new_tokens.append(replacement) elif pos in ["adjective"]: continue # Remove exaggerated words else: new_tokens.append(token.text) else: new_tokens.append(token.text) return " ".join(new_tokens)