Spaces:
Runtime error
Runtime error
Commit ·
3500aa6
1
Parent(s): fa24845
Update comparativesIdentification.py
Browse files
comparativesIdentification.py
CHANGED
|
@@ -11,7 +11,6 @@ spacy.cli.download("en_core_web_sm")
|
|
| 11 |
# use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
|
| 12 |
nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
|
| 13 |
|
| 14 |
-
|
| 15 |
def find_comptives_symbols(sentence):
|
| 16 |
"""
|
| 17 |
Capture unique cases of symbols like <, >, =, <=, >= and ==
|
|
@@ -28,7 +27,8 @@ def find_comptives_symbols(sentence):
|
|
| 28 |
|
| 29 |
found_symbols = []
|
| 30 |
for matching in matches:
|
| 31 |
-
found_symbols.append({'comparative': ['symbol', matching]})
|
|
|
|
| 32 |
|
| 33 |
return found_symbols
|
| 34 |
|
|
@@ -616,11 +616,17 @@ def identify_double_symbol_comparisons(sentence):
|
|
| 616 |
|
| 617 |
comparative_list = [{'comparative': []}]
|
| 618 |
for phrase, operator in zip(found_phrases, found_operators):
|
| 619 |
-
comparative_list[0]['comparative'].append(phrase)
|
| 620 |
-
comparative_list[0]['comparative'].append(operator)
|
|
|
|
|
|
|
| 621 |
|
| 622 |
-
|
| 623 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 624 |
|
| 625 |
|
| 626 |
def check_substrings(lst):
|
|
@@ -643,6 +649,22 @@ def identify_comparatives(sentence):
|
|
| 643 |
This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
|
| 644 |
"""
|
| 645 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
# Identify straightforward patterns
|
| 647 |
straight_comptives = find_comptives_straight_patterns(sentence)
|
| 648 |
|
|
@@ -656,8 +678,6 @@ def identify_comparatives(sentence):
|
|
| 656 |
|
| 657 |
multi_verb = multiword_verb_comptives(sentence)
|
| 658 |
|
| 659 |
-
identify_double_symbols = identify_double_symbol_comparisons(sentence)
|
| 660 |
-
|
| 661 |
# return all the patterns that were captured
|
| 662 |
comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
|
| 663 |
|
|
@@ -748,4 +768,4 @@ def comparatives_binding(sentence):
|
|
| 748 |
return (0, "COMPARATIVES", "more_symbol_comparatives")
|
| 749 |
|
| 750 |
except:
|
| 751 |
-
return (0, "COMPARATIVES", "unknown_error")
|
|
|
|
| 11 |
# use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
|
| 12 |
nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
|
| 13 |
|
|
|
|
| 14 |
def find_comptives_symbols(sentence):
|
| 15 |
"""
|
| 16 |
Capture unique cases of symbols like <, >, =, <=, >= and ==
|
|
|
|
| 27 |
|
| 28 |
found_symbols = []
|
| 29 |
for matching in matches:
|
| 30 |
+
# found_symbols.append({'comparative': ['symbol', matching]})
|
| 31 |
+
found_symbols.append({'comparative': matching})
|
| 32 |
|
| 33 |
return found_symbols
|
| 34 |
|
|
|
|
| 616 |
|
| 617 |
comparative_list = [{'comparative': []}]
|
| 618 |
for phrase, operator in zip(found_phrases, found_operators):
|
| 619 |
+
# comparative_list[0]['comparative'].append(phrase)
|
| 620 |
+
comparative_list[0]['comparative'].append((phrase, operator))
|
| 621 |
+
|
| 622 |
+
final_comptives_list = [{'comparative': comparative_list[0]['comparative'][i:i + 2]} for i in range(0, len(comparative_list[0]['comparative']), 2)]
|
| 623 |
|
| 624 |
+
final_clean_list = []
|
| 625 |
+
for item in final_comptives_list:
|
| 626 |
+
for value in item['comparative']:
|
| 627 |
+
final_clean_list.append({'comparative': value})
|
| 628 |
+
|
| 629 |
+
return final_clean_list
|
| 630 |
|
| 631 |
|
| 632 |
def check_substrings(lst):
|
|
|
|
| 649 |
This function combines the results of all the aforementioned techniques (simple and advance) to identify bigger than, smaller than, equal to patterns
|
| 650 |
"""
|
| 651 |
|
| 652 |
+
# first identify the double symbols (<= >= ==)
|
| 653 |
+
identify_double_symbols_initial = identify_double_symbol_comparisons(sentence)
|
| 654 |
+
|
| 655 |
+
# this is because (for example) bigger than is a subset of bigger or equal than (and it returns conflicts)
|
| 656 |
+
if identify_double_symbols_initial:
|
| 657 |
+
for elem in identify_double_symbols_initial:
|
| 658 |
+
sentence = sentence.replace(elem['comparative'][0], " ")
|
| 659 |
+
|
| 660 |
+
identify_double_symbols = []
|
| 661 |
+
|
| 662 |
+
for item in identify_double_symbols_initial:
|
| 663 |
+
for k, v in item.items():
|
| 664 |
+
if isinstance(v, tuple):
|
| 665 |
+
item[k] = v[1]
|
| 666 |
+
identify_double_symbols.append(item)
|
| 667 |
+
|
| 668 |
# Identify straightforward patterns
|
| 669 |
straight_comptives = find_comptives_straight_patterns(sentence)
|
| 670 |
|
|
|
|
| 678 |
|
| 679 |
multi_verb = multiword_verb_comptives(sentence)
|
| 680 |
|
|
|
|
|
|
|
| 681 |
# return all the patterns that were captured
|
| 682 |
comparatives = straight_comptives + bigger_smaller_comparatives + equal_to_comparatives + single_verb + multi_verb + identify_double_symbols
|
| 683 |
|
|
|
|
| 768 |
return (0, "COMPARATIVES", "more_symbol_comparatives")
|
| 769 |
|
| 770 |
except:
|
| 771 |
+
return (0, "COMPARATIVES", "unknown_error")
|