group singular and plural forms in clouds
Browse files- handler.py +15 -17
- utils.py +5 -1
handler.py
CHANGED
|
@@ -10,7 +10,7 @@ nltk.download('stopwords')
|
|
| 10 |
|
| 11 |
from utils import clean_str, clean_str_nopunct
|
| 12 |
import torch
|
| 13 |
-
from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS
|
| 14 |
|
| 15 |
import transformers
|
| 16 |
from transformers import BertTokenizer, BertForSequenceClassification
|
|
@@ -182,20 +182,20 @@ class Transcript:
|
|
| 182 |
teacher_dict.update(general_words)
|
| 183 |
else:
|
| 184 |
student_dict.update(general_words)
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
| 200 |
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
| 201 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
|
@@ -265,7 +265,6 @@ class QuestionModel:
|
|
| 265 |
return_pooler_output=False)
|
| 266 |
return output
|
| 267 |
|
| 268 |
-
|
| 269 |
class ReasoningModel:
|
| 270 |
def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
|
| 271 |
print("Loading models...")
|
|
@@ -299,7 +298,6 @@ class ReasoningModel:
|
|
| 299 |
token_type_ids=instance["token_type_ids"])
|
| 300 |
return output
|
| 301 |
|
| 302 |
-
|
| 303 |
class UptakeModel:
|
| 304 |
def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
|
| 305 |
print("Loading models...")
|
|
|
|
| 10 |
|
| 11 |
from utils import clean_str, clean_str_nopunct
|
| 12 |
import torch
|
| 13 |
+
from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS, plural_to_singular
|
| 14 |
|
| 15 |
import transformers
|
| 16 |
from transformers import BertTokenizer, BertForSequenceClassification
|
|
|
|
| 182 |
teacher_dict.update(general_words)
|
| 183 |
else:
|
| 184 |
student_dict.update(general_words)
|
| 185 |
+
|
| 186 |
+
def dict_to_list(d, category):
|
| 187 |
+
combined_dict = Counter()
|
| 188 |
+
for word, count in d.items():
|
| 189 |
+
singular_word = plural_to_singular(word)
|
| 190 |
+
combined_dict[singular_word] += count
|
| 191 |
+
return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
|
| 192 |
+
|
| 193 |
+
# Sorting and trimming dictionaries
|
| 194 |
+
dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
|
| 195 |
+
uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
|
| 196 |
+
teacher_dict_list = dict_to_list(teacher_dict, 'general')
|
| 197 |
+
student_dict_list = dict_to_list(student_dict, 'general')
|
| 198 |
+
|
| 199 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
| 200 |
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
| 201 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
|
|
|
| 265 |
return_pooler_output=False)
|
| 266 |
return output
|
| 267 |
|
|
|
|
| 268 |
class ReasoningModel:
|
| 269 |
def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
|
| 270 |
print("Loading models...")
|
|
|
|
| 298 |
token_type_ids=instance["token_type_ids"])
|
| 299 |
return output
|
| 300 |
|
|
|
|
| 301 |
class UptakeModel:
|
| 302 |
def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
|
| 303 |
print("Loading models...")
|
utils.py
CHANGED
|
@@ -811,7 +811,11 @@ p = inflect.engine()
|
|
| 811 |
def singular_to_plural(word):
|
| 812 |
"""Convert singular words to plural using inflect."""
|
| 813 |
plural = p.plural(word)
|
| 814 |
-
return plural
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
|
| 816 |
plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
|
| 817 |
|
|
|
|
| 811 |
def singular_to_plural(word):
|
| 812 |
"""Convert singular words to plural using inflect."""
|
| 813 |
plural = p.plural(word)
|
| 814 |
+
return plural or word
|
| 815 |
+
|
| 816 |
+
def plural_to_singular(word):
|
| 817 |
+
"""Convert plural word to singular using inflect."""
|
| 818 |
+
return p.singular_noun(word) or word
|
| 819 |
|
| 820 |
plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
|
| 821 |
|