Spaces:

asritha22bce
/

New_space_test

Build error

File size: 2,746 Bytes

f92fce7
 
18dfdb0
 
 
 
 
 
 
f92fce7
 
 
bb6b974
 
f92fce7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb6b974
f92fce7
 
 
 
 
 
 
 
 
 
 
bb6b974
f92fce7
bb6b974
 
f92fce7
 
 
bb6b974
 
 
f92fce7
 
bb6b974
 
 
 
f92fce7
58cc55a
bb6b974
f92fce7

import spacy
import pandas as pd
import subprocess

try:
    spacy.load("en_core_web_sm")  # Try loading spaCy model
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)


class NegativeWordReplacer:
    def __init__(self, excel_path):
        """Initialize spaCy NLP model and load replacement dictionary."""
        self.nlp = spacy.load("en_core_web_sm")  # ✅ Load spaCy before calling it
        self.replacement_dict = self.load_replacement_dict(excel_path)

    def normalize_pos(self, pos):
        """Convert spaCy POS tags to match the Excel sheet."""
        pos_mapping = {
            "NOUN": "noun",
            "VERB": "verb",
            "ADJ": "adjective",
            "ADV": "adverb"
        }
        return pos_mapping.get(pos, None)

    def load_replacement_dict(self, excel_path):
        """Load and preprocess the Excel sheet into a dictionary."""
        df = pd.read_excel(excel_path)
        replacement_dict = {}

        for _, row in df.iterrows():
            neg_word = str(row['Exaggerated Word']).lower().strip()
            pos_tag = str(row['POS']).lower().strip()
            neutral_word = str(row['Neutral Word']).lower().strip()

            doc = self.nlp(neg_word)
            lemma = doc[0].lemma_

            replacement_dict[(neg_word, pos_tag)] = neutral_word
            replacement_dict[(lemma, pos_tag)] = neutral_word  # Store root word too

        return replacement_dict

    def replace_negative_words(self, sentence):
        """Replace negative words in a sentence using the dictionary."""
        doc = self.nlp(sentence)
        new_tokens = []

        for token in doc:
            lemma = token.lemma_.lower()
            pos = self.normalize_pos(token.pos_)
            word_lower = token.text.lower()  # Convert to lowercase for lookup

            # ✅ Fix: "amod" dependency only applies to adjectives
            if token.dep_ == "amod":
                pos = "adjective"

            if pos:
                # ✅ Check both lowercase word and its lemma
                replacement = self.replacement_dict.get((word_lower, pos)) or \
                              self.replacement_dict.get((lemma, pos))

                if replacement:
                    # Keep original capitalization
                    if token.text[0].isupper():
                        replacement = replacement.capitalize()

                    new_tokens.append(replacement)
                elif pos in ["adjective"]:
                    continue  # Remove exaggerated words
                else:
                    new_tokens.append(token.text)
            else:
                new_tokens.append(token.text)

        return " ".join(new_tokens)