Spaces:

ValadisCERTH
/

ComparativesModuleSerco

Sleeping

App Files Files Community

ValadisCERTH commited on Mar 29, 2023

Commit

988bfac

1 Parent(s): 66c9e8c

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -35

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ nltk.download('punkt')
 # load the spacy model
 spacy.cli.download("en_core_web_sm")
-spacy.cli.download("en_core_web_lg")
 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp = spacy.load('en_core_web_sm', disable=["parser", "ner"])
@@ -24,6 +23,7 @@ def find_comptives_symbols(sentence):
     If more than one symbols exist, return []
     """
     pattern = r"(?<![<=>])[%s](?![<=>])" % (re.escape("<=>"))
     matches = re.findall(pattern, sentence)
@@ -38,7 +38,7 @@ def find_comptives_symbols(sentence):
 def find_comptives_straight_patterns(sentence):
     """
-    Function to identivy mentions of compartives. The form is "comparative adverbs/adjectives followed by than", "words like more/less followed by than", "equal to"
     """
     doc = nlp(sentence)
@@ -113,7 +113,6 @@ def find_comptives_straight_patterns(sentence):
     return comparatives
 # helper functions for 'identify_pattern_bigger_smaller'
 def identify_comparison(sentence):
@@ -319,7 +318,7 @@ def find_equal_to_comptives_ngrams(sentence):
               similarity = sentence_ngram_doc.similarity(emb_ref)
               if similarity >= max_similarity:
-                  possible_reference_list.append({'comparative': [sentence_ngram_str, emb_ref, similarity, "="]})
                   break
     # if we have found a possible refernce that is similar enough with an n-gram of the input sentence, return the comparative '=', otherwise return 0
@@ -329,6 +328,7 @@ def find_equal_to_comptives_ngrams(sentence):
       return []
 def single_verb_comptives(sentence):
     """
     This function takes a sentence and identifies any mention of bigger than, smaller than, equal to, expressed
@@ -363,10 +363,22 @@ def single_verb_comptives(sentence):
                         break
                     elif any(lemma in equal_references_sg for lemma in syn.lemma_names()):
-                        # print(lemma)
                         equal_list.append({'comparative': [token.text, "="]})
                         break
     final_list = bigger_list + smaller_list + equal_list
     if final_list:
@@ -503,6 +515,7 @@ def multiword_verb_comptives(sentence):
     return bigger_l + smaller_l + equal_l
 def identify_comparatives(sentence):
     """
     This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
@@ -535,55 +548,74 @@ def identify_comparatives(sentence):
     return unique_output
-def magnitude_binding(sentence):
-  comparative_symbols = find_comptives_symbols(sentence)
-  comparative_mentions = identify_comparatives(sentence)
-  # starting with the symbols, if one was captured
-  if len(comparative_symbols) == 1:
-    # if the rest of the functions are empty (meaning that there are no other references)
-    if len(comparative_mentions) == 0:
-      return comparative_symbols
-  # in case that there is no symbol
-  elif len(comparative_symbols) == 0:
-    # we need only one mention of comparatives
-    if len(comparative_mentions) == 1:
-      return comparative_mentions
     else:
-      return 0
-  # case of multiple symbol references
-  else:
-    return 0
 from transformers import pipeline
 import gradio as gr
-title = "Natural Language module Demo for Comparatives identification"
-description = "This is a simple demo just for demonstration purposes, so that Serco team might have the chance to validate the results of the Natural Language module concerning the comparatives identification, while in progress"
 examples = [
-    ["earthquake located in Ishkoshim higher than 5, Tajikistan in May the ninth with magnitude equal to 6.2"],
-    ["earthquake located in Ishkoshim, Tajikistan in May the ninth with magnitude < 6.2"],
-    ["earthquake located in Ishkoshim that is > than the one in Rome, and < than 8.2"],
-    ["earthquake located in Ishkoshim, Tajikistan in May the ninth with magnitude lesser than 6.2"],
-    ["earthquake located in Ishkoshim, Tajikistan in May the ninth with magnitude same with 6.2"],
-    ["I want an earthquake that happend in Rome during 2016 with a magnitude dallying of 5."],
-    ["I want an earthquake that happend in Rome during 2016 and surpassed the magnitude of 5."],
-    ["I want an earthquake that happend in Rome during 2016 with a magnitude similar to 5."],
-    ["I want an earthquaqe event that happend in Italy, Rome during 2016 February with a magnitude that was in a par with 5."]
 ]
 gr.Interface(
-    fn=magnitude_binding,
     inputs="text",
     outputs="text",
     title=title,

 # load the spacy model
 spacy.cli.download("en_core_web_sm")
 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp = spacy.load('en_core_web_sm', disable=["parser", "ner"])
     If more than one symbols exist, return []
     """
+    # symbols regex pattern
     pattern = r"(?<![<=>])[%s](?![<=>])" % (re.escape("<=>"))
     matches = re.findall(pattern, sentence)
 def find_comptives_straight_patterns(sentence):
     """
+    Function to identivy mentions of comparatives. The form is "comparative adverbs/adjectives followed by than", "words like more/less followed by than", "equal to"
     """
     doc = nlp(sentence)
     return comparatives
 # helper functions for 'identify_pattern_bigger_smaller'
 def identify_comparison(sentence):
               similarity = sentence_ngram_doc.similarity(emb_ref)
               if similarity >= max_similarity:
+                  possible_reference_list.append({'comparative': [sentence_ngram_str, "="]})
                   break
     # if we have found a possible refernce that is similar enough with an n-gram of the input sentence, return the comparative '=', otherwise return 0
       return []
 def single_verb_comptives(sentence):
     """
     This function takes a sentence and identifies any mention of bigger than, smaller than, equal to, expressed
                         break
                     elif any(lemma in equal_references_sg for lemma in syn.lemma_names()):
                         equal_list.append({'comparative': [token.text, "="]})
                         break
+                # for syn in synsets:
+                #     antonyms = syn.lemmas()[0].antonyms()
+                #     if antonyms and any(lemma in bigger_references_sg for lemma in antonyms[0].name()):
+                #       return 0
+                #     elif antonyms and any(lemma in lesser_references_sg for lemma in antonyms[0].name()):
+                #       return 0
+                #     elif antonyms and any(lemma in equal_references_sg for lemma in antonyms[0].name()):
+                #       return 0
     final_list = bigger_list + smaller_list + equal_list
     if final_list:
     return bigger_l + smaller_l + equal_l
 def identify_comparatives(sentence):
     """
     This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
     return unique_output
+def comparatives_binding(sentence):
+  try:
+    comparative_symbols = find_comptives_symbols(sentence)
+    comparative_mentions = identify_comparatives(sentence)
+    # starting with the symbols, if one was captured
+    if len(comparative_symbols) == 1:
+      # if the rest of the functions are empty (meaning that there are no other references)
+      if len(comparative_mentions) == 0:
+        return comparative_symbols
+      else:
+        return (0, "COMPARATIVES", "more_comparatives_mentions")
+    # in case that there is no symbol
+    elif len(comparative_symbols) == 0:
+      # we need only one mention of comparatives
+      if len(comparative_mentions) == 1:
+        return comparative_mentions
+      # case of no comparative mentions
+      elif len(comparative_mentions) == 0:
+        return (0, "COMPARATIVES", "no_comparatives")
+      # case of no more than one comparative mentions
+      else:
+        return (0, "COMPARATIVES", "more_comparatives_mentions")
+    # case of multiple symbol references
     else:
+      return (0, "COMPARATIVES", "more_symbol_comparatives")
+  except:
+    return (0, "COMPARATIVES", "unknown_error")
 from transformers import pipeline
 import gradio as gr
+title = "Comparatives Demo"
+description = "This is a simple demo just for demonstration purposes for Serco team, to validate the results of the Natural Language module concerning comparatives identification, while in progress"
 examples = [
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude > 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude = 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude bigger than 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude more than 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude higher than 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude smaller than 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude lesser than 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude equal to 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude equivalent to 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude surpassing 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude lagging of 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that matches 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that is superior of 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that is inferior of 6.2"],
+    ["I want an earthquake that is located in Rome, Italy on 01/01/23 with magnitude that is in line with 6.2"]
 ]
 gr.Interface(
+    fn=comparatives_binding,
     inputs="text",
     outputs="text",
     title=title,