Spaces:
Runtime error
Runtime error
a-v-bely
commited on
Commit
·
28dab52
1
Parent(s):
703d114
Fix bugs
Browse files
utilities_language_general/rus_utils.py
CHANGED
|
@@ -281,7 +281,7 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
|
|
| 281 |
and decision
|
| 282 |
and distractor_lemma != lemma
|
| 283 |
and len(distractors) < 100
|
| 284 |
-
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
|
| 285 |
and length_ratio <= max_length_ratio
|
| 286 |
and distractor_lemma not in global_distractors
|
| 287 |
and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
|
|
@@ -305,8 +305,8 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
|
|
| 305 |
level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
|
| 306 |
substitute_lemma=candidate[0], substitute_pos=d_pos)
|
| 307 |
condition = (((d1_pos == pos or d2_pos == pos)
|
| 308 |
-
or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(
|
| 309 |
-
and
|
| 310 |
or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
| 311 |
and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
|
| 312 |
or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
|
@@ -359,7 +359,6 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
|
|
| 359 |
distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
|
| 360 |
distractor_similarity = candidate_distractor[1]
|
| 361 |
candidate_gender = define_gender(distractor_lemma)
|
| 362 |
-
# print(distractor_lemma, candidate_gender, distractor_pos, pos)
|
| 363 |
length_ratio = abs(len(lemma) - len(distractor_lemma))
|
| 364 |
decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
|
| 365 |
target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
|
|
@@ -370,7 +369,7 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
|
|
| 370 |
and decision
|
| 371 |
and distractor_lemma != lemma
|
| 372 |
and (len(_distractors) < max_num_distractors + 10)
|
| 373 |
-
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
|
| 374 |
and (length_ratio <= max_length_ratio) # May be changed if case of phrases
|
| 375 |
and (distractor_lemma not in global_distractors)
|
| 376 |
and (edit_distance(lemma, distractor_lemma) # May be changed if case of phrases
|
|
|
|
| 281 |
and decision
|
| 282 |
and distractor_lemma != lemma
|
| 283 |
and len(distractors) < 100
|
| 284 |
+
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
|
| 285 |
and length_ratio <= max_length_ratio
|
| 286 |
and distractor_lemma not in global_distractors
|
| 287 |
and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
|
|
|
|
| 305 |
level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
|
| 306 |
substitute_lemma=candidate[0], substitute_pos=d_pos)
|
| 307 |
condition = (((d1_pos == pos or d2_pos == pos)
|
| 308 |
+
or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(d_pos) is not None
|
| 309 |
+
and d_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][d_pos])
|
| 310 |
or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
| 311 |
and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
|
| 312 |
or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
|
|
|
| 359 |
distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
|
| 360 |
distractor_similarity = candidate_distractor[1]
|
| 361 |
candidate_gender = define_gender(distractor_lemma)
|
|
|
|
| 362 |
length_ratio = abs(len(lemma) - len(distractor_lemma))
|
| 363 |
decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
|
| 364 |
target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
|
|
|
|
| 369 |
and decision
|
| 370 |
and distractor_lemma != lemma
|
| 371 |
and (len(_distractors) < max_num_distractors + 10)
|
| 372 |
+
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
|
| 373 |
and (length_ratio <= max_length_ratio) # May be changed if case of phrases
|
| 374 |
and (distractor_lemma not in global_distractors)
|
| 375 |
and (edit_distance(lemma, distractor_lemma) # May be changed if case of phrases
|
utilities_language_w2v/rus_sentence_w2v.py
CHANGED
|
@@ -86,6 +86,7 @@ class SENTENCE:
|
|
| 86 |
for _utw in user_target_words:
|
| 87 |
if _utw in self.original:
|
| 88 |
parse_utw = nlp(_utw)
|
|
|
|
| 89 |
if ' ' in _utw:
|
| 90 |
tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
|
| 91 |
user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
|
|
@@ -103,7 +104,7 @@ class SENTENCE:
|
|
| 103 |
'original_text': _utw,
|
| 104 |
'lemma': user_target_word_lemma,
|
| 105 |
'pos': user_target_word_pos,
|
| 106 |
-
'gender':
|
| 107 |
'tags': user_target_word_tags,
|
| 108 |
'position_in_sentence': self.original.find(_utw),
|
| 109 |
'not_named_entity': not_ner,
|
|
|
|
| 86 |
for _utw in user_target_words:
|
| 87 |
if _utw in self.original:
|
| 88 |
parse_utw = nlp(_utw)
|
| 89 |
+
gender = convert_gender(parse_utw[0].morph.to_dict().get('Gender'))
|
| 90 |
if ' ' in _utw:
|
| 91 |
tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
|
| 92 |
user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
|
|
|
|
| 104 |
'original_text': _utw,
|
| 105 |
'lemma': user_target_word_lemma,
|
| 106 |
'pos': user_target_word_pos,
|
| 107 |
+
'gender': gender if gender else 'masc',
|
| 108 |
'tags': user_target_word_tags,
|
| 109 |
'position_in_sentence': self.original.find(_utw),
|
| 110 |
'not_named_entity': not_ner,
|