Spaces:
Build error
Build error
| import spacy | |
| import pandas as pd | |
| import subprocess | |
| try: | |
| spacy.load("en_core_web_sm") # Try loading spaCy model | |
| except OSError: | |
| subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True) | |
| class NegativeWordReplacer: | |
| def __init__(self, excel_path): | |
| """Initialize spaCy NLP model and load replacement dictionary.""" | |
| self.nlp = spacy.load("en_core_web_sm") # ✅ Load spaCy before calling it | |
| self.replacement_dict = self.load_replacement_dict(excel_path) | |
| def normalize_pos(self, pos): | |
| """Convert spaCy POS tags to match the Excel sheet.""" | |
| pos_mapping = { | |
| "NOUN": "noun", | |
| "VERB": "verb", | |
| "ADJ": "adjective", | |
| "ADV": "adverb" | |
| } | |
| return pos_mapping.get(pos, None) | |
| def load_replacement_dict(self, excel_path): | |
| """Load and preprocess the Excel sheet into a dictionary.""" | |
| df = pd.read_excel(excel_path) | |
| replacement_dict = {} | |
| for _, row in df.iterrows(): | |
| neg_word = str(row['Exaggerated Word']).lower().strip() | |
| pos_tag = str(row['POS']).lower().strip() | |
| neutral_word = str(row['Neutral Word']).lower().strip() | |
| doc = self.nlp(neg_word) | |
| lemma = doc[0].lemma_ | |
| replacement_dict[(neg_word, pos_tag)] = neutral_word | |
| replacement_dict[(lemma, pos_tag)] = neutral_word # Store root word too | |
| return replacement_dict | |
| def replace_negative_words(self, sentence): | |
| """Replace negative words in a sentence using the dictionary.""" | |
| doc = self.nlp(sentence) | |
| new_tokens = [] | |
| for token in doc: | |
| lemma = token.lemma_.lower() | |
| pos = self.normalize_pos(token.pos_) | |
| word_lower = token.text.lower() # Convert to lowercase for lookup | |
| # ✅ Fix: "amod" dependency only applies to adjectives | |
| if token.dep_ == "amod": | |
| pos = "adjective" | |
| if pos: | |
| # ✅ Check both lowercase word and its lemma | |
| replacement = self.replacement_dict.get((word_lower, pos)) or \ | |
| self.replacement_dict.get((lemma, pos)) | |
| if replacement: | |
| # Keep original capitalization | |
| if token.text[0].isupper(): | |
| replacement = replacement.capitalize() | |
| new_tokens.append(replacement) | |
| elif pos in ["adjective"]: | |
| continue # Remove exaggerated words | |
| else: | |
| new_tokens.append(token.text) | |
| else: | |
| new_tokens.append(token.text) | |
| return " ".join(new_tokens) | |