Spaces:

asritha22bce
/

New_space_test

Build error

App Files Files Community

asritha22bce commited on Feb 19, 2025

Commit

bb6b974

verified ·

1 Parent(s): 07a8a78

Update preprocessing.py

Browse files

Files changed (1) hide show

preprocessing.py +14 -6

preprocessing.py CHANGED Viewed

@@ -3,9 +3,9 @@ import pandas as pd
 class NegativeWordReplacer:
     def __init__(self, excel_path):
-        """Initialize replacement dictionary from Excel file."""
         self.replacement_dict = self.load_replacement_dict(excel_path)
-        self.nlp = spacy.load("en_core_web_sm")  # Load spaCy model once
     def normalize_pos(self, pos):
         """Convert spaCy POS tags to match the Excel sheet."""
@@ -31,7 +31,7 @@ class NegativeWordReplacer:
             lemma = doc[0].lemma_
             replacement_dict[(neg_word, pos_tag)] = neutral_word
-            replacement_dict[(lemma, pos_tag)] = neutral_word
         return replacement_dict
@@ -43,17 +43,25 @@ class NegativeWordReplacer:
         for token in doc:
             lemma = token.lemma_.lower()
             pos = self.normalize_pos(token.pos_)
-            if token.pos_ == "VERB" and token.dep_ == "amod":
                 pos = "adjective"
             if pos:
-                replacement = self.replacement_dict.get((lemma, pos), None)
                 if replacement:
                     new_tokens.append(replacement)
                 elif pos in ["adjective", "adverb"]:
-                    continue  # Remove unnecessary exaggerated words
                 else:
                     new_tokens.append(token.text)
             else:

 class NegativeWordReplacer:
     def __init__(self, excel_path):
+        """Initialize spaCy NLP model and load replacement dictionary."""
+        self.nlp = spacy.load("en_core_web_sm")  # ✅ Load spaCy before calling it
         self.replacement_dict = self.load_replacement_dict(excel_path)
     def normalize_pos(self, pos):
         """Convert spaCy POS tags to match the Excel sheet."""
             lemma = doc[0].lemma_
             replacement_dict[(neg_word, pos_tag)] = neutral_word
+            replacement_dict[(lemma, pos_tag)] = neutral_word  # Store root word too
         return replacement_dict
         for token in doc:
             lemma = token.lemma_.lower()
             pos = self.normalize_pos(token.pos_)
+            word_lower = token.text.lower()  # Convert to lowercase for lookup
+            # ✅ Fix: "amod" dependency only applies to adjectives
+            if token.dep_ == "amod":
                 pos = "adjective"
             if pos:
+                # ✅ Check both lowercase word and its lemma
+                replacement = self.replacement_dict.get((word_lower, pos)) or \
+                              self.replacement_dict.get((lemma, pos))
                 if replacement:
+                    # Keep original capitalization
+                    if token.text[0].isupper():
+                        replacement = replacement.capitalize()
                     new_tokens.append(replacement)
                 elif pos in ["adjective", "adverb"]:
+                    continue  # Remove exaggerated words
                 else:
                     new_tokens.append(token.text)
             else: