Spaces:

ValadisCERTH
/

NaturalLanguageModule_complete

Runtime error

App Files Files Community

ValadisCERTH commited on May 10, 2023

Commit

e28e68e

1 Parent(s): b6216d1

Update comparativesIdentification.py

Browse files

Files changed (1) hide show

comparativesIdentification.py +16 -6

comparativesIdentification.py CHANGED Viewed

@@ -6,6 +6,8 @@ import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
@@ -45,12 +47,15 @@ def find_comptives_straight_patterns(sentence):
         # find mentions of "equal" followed by "to"
         if token.text.lower() == "equal":
             next_token = token.nbor()
             if next_token.text.lower() == "to":
                 prev_token = token.nbor(-1)
                 if prev_token.pos_ == "NOUN":
                     # comparatives.append({'comparative': ["equal to", "="]})
                     comparatives.append({'comparative': "="})
@@ -60,18 +65,22 @@ def find_comptives_straight_patterns(sentence):
             next_token = token.nbor()
             if next_token.text.lower() == "than":
                 prev_token = token.nbor(-1)
                 if token.text.lower() == 'more':
                     # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
                     comparatives.append({'comparative': '>'})
                 elif token.text.lower() == 'less':
                     # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
                     comparatives.append({'comparative': '<'})
         # find mentions of comparative adjectives or comparative adverbs followed by "than"
         elif token.tag_ == "JJR" or token.tag_ == "RBR":
             next_token = token.nbor()
             if next_token.text.lower() == "than" and next_token.nbor().pos_ != "NOUN":
@@ -79,9 +88,7 @@ def find_comptives_straight_patterns(sentence):
                 # check if the token is a synonym of "bigger"
                 # retrieve a set of synonyms for the concepts of 'big' and 'bigger'
-                big_synonyms = set(
-                    wordnet.synsets('big') + wordnet.synsets('large') + wordnet.synsets('great') + wordnet.synsets(
-                        'huge') + wordnet.synsets('enormous') + wordnet.synsets('heavy') + wordnet.synsets(
                         'strong') + wordnet.synsets('enormous') + wordnet.synsets('massive') + wordnet.synsets(
                         'immense') + wordnet.synsets('substantial'))
                 bigger_synonyms = set(wordnet.synsets('bigger') + wordnet.synsets('larger') + wordnet.synsets(
@@ -89,17 +96,19 @@ def find_comptives_straight_patterns(sentence):
                     'heavier') + wordnet.synsets('stronger'))
                 bigger_related_words = big_synonyms.union(bigger_synonyms)
                 bigger_rel_words = [word.name().split('.')[0] for word in bigger_related_words]
                 flag_bigger = 0
                 if token.text.lower() in bigger_rel_words:
                     flag_bigger = 1
                     # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
                     comparatives.append({'comparative': '>'})
                 # if no synonym of bigger was found, check for smaller synsets
-                if not flag_bigger:
                     # retrieve a set of synonyms for the concepts of 'small' and 'smaller'
                     small_synonyms = set(wordnet.synsets('small') + wordnet.synsets('little') + wordnet.synsets(
@@ -113,6 +122,7 @@ def find_comptives_straight_patterns(sentence):
                     smaller_rel_words = [word.name().split('.')[0] for word in smaller_related_words]
                     if token.text.lower() in smaller_rel_words:
                         flag_bigger = 0
                         # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
                         comparatives.append({'comparative': '<'})
@@ -668,7 +678,7 @@ def identify_comparatives(sentence):
     # Identify straightforward patterns
     straight_comptives = find_comptives_straight_patterns(sentence)
-    # Identify advanced bigger/smaller comparatives
     bigger_smaller_comparatives = identify_bigger_smaller_advanced(sentence)
     # Identify advanced equal-to comparatives
@@ -733,7 +743,7 @@ def identify_comparatives(sentence):
 def comparatives_binding(sentence):
   try:
     comparative_symbols = find_comptives_symbols(sentence)
     comparative_mentions = identify_comparatives(sentence)

 from sklearn.metrics.pairwise import cosine_similarity
+spacy.cli.download("en_core_web_sm")
 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
         # find mentions of "equal" followed by "to"
         if token.text.lower() == "equal":
             next_token = token.nbor()
             if next_token.text.lower() == "to":
                 prev_token = token.nbor(-1)
                 if prev_token.pos_ == "NOUN":
                     # comparatives.append({'comparative': ["equal to", "="]})
                     comparatives.append({'comparative': "="})
             next_token = token.nbor()
             if next_token.text.lower() == "than":
                 prev_token = token.nbor(-1)
                 if token.text.lower() == 'more':
                     # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
                     comparatives.append({'comparative': '>'})
                 elif token.text.lower() == 'less':
                     # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
                     comparatives.append({'comparative': '<'})
         # find mentions of comparative adjectives or comparative adverbs followed by "than"
         elif token.tag_ == "JJR" or token.tag_ == "RBR":
             next_token = token.nbor()
             if next_token.text.lower() == "than" and next_token.nbor().pos_ != "NOUN":
                 # check if the token is a synonym of "bigger"
                 # retrieve a set of synonyms for the concepts of 'big' and 'bigger'
+                big_synonyms = set(wordnet.synsets('big') + wordnet.synsets('large') + wordnet.synsets('great') + wordnet.synsets('huge') + wordnet.synsets('enormous') + wordnet.synsets('heavy') + wordnet.synsets(
                         'strong') + wordnet.synsets('enormous') + wordnet.synsets('massive') + wordnet.synsets(
                         'immense') + wordnet.synsets('substantial'))
                 bigger_synonyms = set(wordnet.synsets('bigger') + wordnet.synsets('larger') + wordnet.synsets(
                     'heavier') + wordnet.synsets('stronger'))
                 bigger_related_words = big_synonyms.union(bigger_synonyms)
                 bigger_rel_words = [word.name().split('.')[0] for word in bigger_related_words]
                 flag_bigger = 0
                 if token.text.lower() in bigger_rel_words:
                     flag_bigger = 1
                     # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
                     comparatives.append({'comparative': '>'})
                 # if no synonym of bigger was found, check for smaller synsets
+                if flag_bigger==0:
                     # retrieve a set of synonyms for the concepts of 'small' and 'smaller'
                     small_synonyms = set(wordnet.synsets('small') + wordnet.synsets('little') + wordnet.synsets(
                     smaller_rel_words = [word.name().split('.')[0] for word in smaller_related_words]
                     if token.text.lower() in smaller_rel_words:
                         flag_bigger = 0
                         # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
                         comparatives.append({'comparative': '<'})
     # Identify straightforward patterns
     straight_comptives = find_comptives_straight_patterns(sentence)
+    # Identify advanced bigger/smaller comparativesunknown_error
     bigger_smaller_comparatives = identify_bigger_smaller_advanced(sentence)
     # Identify advanced equal-to comparatives
 def comparatives_binding(sentence):
+  #
   try:
     comparative_symbols = find_comptives_symbols(sentence)
     comparative_mentions = identify_comparatives(sentence)