Spaces:
Sleeping
Sleeping
| import re | |
| import math | |
| from pathlib import Path | |
| from typing import List, Dict | |
| from translate import Translator | |
| from config import * | |
| from prompt import * | |
| from llm import OpenAI3, OpenAI4 | |
| import random | |
| # question_type_to_material_type = {'WordCompleteChoice': 'word', | |
| # 'WordCompleteBlank': 'word', | |
| # 'WordRightSpellingChoice': 'word', | |
| # 'PhraseTranslatingMatchChoice': 'phrase', | |
| # 'SentenceRecompositionBlank': 'sentence', | |
| # 'SentenceCompleteChoice': 'sentence', | |
| # 'GrammarChoice': 'grammar' | |
| # } | |
| question_type_to_class_type = {'请将选择正确的拼写补全单词': 'WordCompleteChoice' , | |
| '根据汉语意思补全单词': 'WordCompleteBlank', | |
| '根据汉语意思选出拼写正确的单词': 'WordRightSpellingChoice', | |
| '请根据汉语意思选择正确的短语': 'PhraseTranslatingMatchChoice', | |
| '请将下列单词组成完整的一句话': 'SentenceRecompositionBlank', | |
| '请选择正确的短语补全句子': 'SentenceCompleteChoice', | |
| '请根据语法知识选择正确的选项补全句子': 'GrammarChoice' | |
| } | |
| def trans_en_to_cn(word: str) -> str: | |
| if word in ['crayon', 'Crayon']: | |
| chinese = '蜡笔' | |
| else: | |
| chinese = Translator(from_lang='English', to_lang='Chinese').translate(word) | |
| return chinese | |
| def delete_index(string: str) -> str: | |
| if len(string) > 1: # incase of the sting is a single letter when generate word complete question | |
| if string[1] in ['.', ':', '、', ':', ')', ')']: | |
| string = string[2:] | |
| return string | |
| def normalize_options_and_answer(answer, options): | |
| if len(options) != 1: | |
| random.shuffle(options) | |
| options = [delete_index(option) for option in options] | |
| answer = delete_index(answer) | |
| answer_index = options.index(answer) | |
| answer = number_letter_dict[answer_index] + ':' + answer | |
| options = [number_letter_dict[i] + ':' + options[i] for i in range(len(options))] | |
| options = ','.join(options) | |
| else: | |
| options = options[0] | |
| return answer, options | |
| def response_to_question(response): | |
| try: | |
| try: | |
| _, stem, options, answer = re.split( | |
| r'stem: |stem:|Stem: |Stem:' | |
| r'|options: |options:|Options: |Options:' | |
| r'|Correct answer: |Correct answer:|correct answer: |correct answer:' | |
| r'|answer: |answer:|Answer: |Answer:' | |
| , response) | |
| except: | |
| try: | |
| _, stem, options = re.split( | |
| r'stem: |stem:|Stem: |Stem:' | |
| r'|options: |options:|Options: |Options:' | |
| , response) | |
| answer = None | |
| except: | |
| _, question, answer = re.split( | |
| r'question: |question:|Question: |Question:' | |
| r'|Correct answer: |Correct answer:|correct answer: |correct answer:' | |
| r'|answer: |answer:|Answer: |Answer:' | |
| , response) | |
| stem = None | |
| options = None | |
| while question[-1] == '\n': | |
| question = question[:-1] | |
| if stem: | |
| while stem[-1] == '\n': | |
| stem = stem[:-1] | |
| if options: | |
| options = options.split('\n') | |
| options = [option for option in options if option != ''] | |
| if answer is None: | |
| answer = 'None' | |
| for i in range(len(options)): | |
| if 'correct answer' in options[i]: | |
| answer = options[i] | |
| answer = re.split(r'\(correct answer\)|correct answer', answer) | |
| answer = [i for i in answer if i is not None][0] | |
| options[i] = answer | |
| answer = answer.split('\n')[0] | |
| answer, options = normalize_options_and_answer(answer, options) | |
| question = stem + '\n' + options | |
| except Exception as e: | |
| print(f'fail!,reason:{e},response:{response}') | |
| question = 'fail!' | |
| answer = 'fail!' | |
| return question, answer | |
| def response_to_options(response): | |
| options = response.split('\n') | |
| options = [delete_index(option) for option in options] | |
| return options | |
| def _materials_select(materials: List[str], question_num: int, option_num: int = 1): | |
| """ | |
| :param materials: | |
| :param question_num: | |
| :param option_num: | |
| :return: | |
| """ | |
| material = materials * math.ceil(option_num/len(materials)) | |
| select_materials = [random.sample(material, option_num) for i in range(question_num)] | |
| if option_num == 1: | |
| select_materials = [i[0] for i in select_materials] | |
| # random.shuffle(material_index) | |
| # if len(material_index) >= question_num: | |
| # material_index = material_index[:question_num] | |
| # else: | |
| # times = int(question_num / len(materials)) | |
| # remain = question_num - len(materials) * times | |
| # material_index = material_index * times + material_index[:remain] | |
| # select_materials = [materials[i] for i in material_index] | |
| return select_materials | |
| def _generate(prompt, model='openai_3'): | |
| model = eval(model_name[model])() | |
| response = model(prompt) | |
| return response | |
| def _generate_fake_word(word): | |
| word_vowels = [] | |
| word_vowel = {} | |
| for vowels in vowels_list: | |
| if vowels in word: | |
| word_vowels.append(vowels) | |
| for i in range(len(word)): | |
| if word[i] in vowel_list: | |
| word_vowel[i] = word[i] | |
| if word_vowels: | |
| true_letters = word_vowels[random.randint(0, len(word_vowels) - 1)] | |
| fake_letters = [i for i in vowels_list if (len(i) == len(true_letters)) & (i != true_letters)] | |
| fake_letters = random.choice(fake_letters) | |
| word = word.split(true_letters) | |
| index = random.randint(0, len(word) - 2) | |
| for i in range(len(word) - 1): | |
| if i != index: | |
| word[i] = word[i] + true_letters | |
| else: | |
| word[i] = word[i] + fake_letters | |
| fake_word = ''.join(word) | |
| elif word_vowel: | |
| index = random.choice(list(word_vowel.keys())) | |
| true_letter = word[index] | |
| vowel_list_copy = vowel_list.copy() | |
| vowel_list_copy.pop(vowel_list_copy.index(true_letter)) | |
| fake_letters = random.choice(vowel_list_copy) | |
| fake_word = word[:index] + fake_letters + word[index+1:] | |
| else: | |
| index = random.choice(range(len(word))) | |
| fake_word = word[:index] + word[index] + word[:index] | |
| return fake_word | |
| class WordCompleteChoice(object): | |
| title = '请将选择正确的拼写补全单词' | |
| def generate(words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: | |
| responses = [] | |
| words = _materials_select(words, question_num) | |
| for word in words: | |
| word_vowels = [] | |
| for vowels in vowels_list: | |
| if vowels in word: | |
| word_vowels.append(vowels) | |
| if word_vowels: | |
| answer = word_vowels[random.randint(0, len(word_vowels) - 1)] | |
| stem = '' | |
| _stem = word.split(answer) | |
| index = random.randint(0, len(_stem) - 2) | |
| for i in range(len(_stem) - 1): | |
| if i != index: | |
| stem = stem + _stem[i] + answer | |
| else: | |
| stem = stem + _stem[i] + '_' * len(answer) | |
| stem = stem + _stem[-1] | |
| options = [i for i in vowels_list if (len(i) == len(answer)) & (i != answer)] | |
| if len(options) > option_num - 1: | |
| random.shuffle(options) | |
| options = options[:option_num - 1] | |
| options.append(answer) | |
| else: | |
| index = list(range(len(word))) | |
| num = random.randint(2, 3) | |
| num = len(word) - 3 if num > len(word) - 3 else num | |
| random.shuffle(index) | |
| ans_index = index[:num] | |
| ans_index.sort() | |
| answer = [word[i] + ',' for i in ans_index] | |
| answer = ''.join(answer) | |
| answer = answer[:-1] | |
| stem = ['_' if i in ans_index else word[i] for i in range(len(word))] | |
| stem = ''.join(stem) | |
| options = [answer] | |
| count = 1 | |
| while count < option_num: | |
| random.shuffle(index) | |
| option_index = index[:num] | |
| option = [word[i] + ',' for i in option_index] | |
| option = ''.join(option) | |
| option = option[:-1] | |
| if option not in options: | |
| options.append(option) | |
| count += 1 | |
| answer, options = normalize_options_and_answer(answer, options) | |
| question = stem + '\n' + options | |
| responses.append({'question': question, 'answer': answer}) | |
| return responses | |
| class WordCompleteBlank(object): | |
| title = '根据汉语意思补全单词' | |
| def generate(cls, words: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]: | |
| responses = [] | |
| _words = _materials_select(words, question_num, option_num) | |
| for words in _words: | |
| answer_list = [] | |
| stem_list = [] | |
| for word in words: | |
| word_vowels = [] | |
| for vowel in vowels_list: | |
| if vowel in word: | |
| word_vowels.append(vowel) | |
| chinese = trans_en_to_cn(word) | |
| if word_vowels: | |
| letters = word_vowels[random.randint(0, len(word_vowels) - 1)] | |
| stem = chinese + ':' | |
| _word = word.split(letters) | |
| index = random.randint(0, len(_word) - 2) | |
| for i in range(len(_word) - 1): | |
| if i != index: | |
| stem = stem + _word[i] + letters | |
| else: | |
| stem = stem + _word[i] + '_' * len(letters) | |
| stem = stem + _word[-1] | |
| else: | |
| num = random.randint(2, 3) | |
| num = len(word) - 2 if num > len(word) - 2 else num | |
| index = random.randint(0, (len(word)-num+1)) | |
| stem = chinese + ':' + word[:index] + '_' * num + word[index+num:] | |
| stem_list.append(stem) | |
| answer = chinese + ':' + word | |
| answer_list.append(answer) | |
| responses.append({'question': ' '.join(stem_list), 'answer': ' '.join(answer_list)}) | |
| return responses | |
| class WordRightSpellingChoice(object): | |
| title = '根据汉语意思选出拼写正确的单词' | |
| def generate(cls, words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: | |
| responses = [] | |
| words = _materials_select(words, question_num) | |
| for word in words: | |
| stem = trans_en_to_cn(word) | |
| answer = word | |
| options = [_generate_fake_word(word) for i in range(option_num-1)] | |
| options.append(answer) | |
| answer, options = normalize_options_and_answer(answer, options) | |
| question = stem + '\n' + options | |
| responses.append({'question': question, 'answer': answer}) | |
| return responses | |
| class SentenceRecompositionBlank(object): | |
| title = '请将下列单词组成完整的一句话' | |
| generate_similar_sentence_prompt = 'Please generate a sentence with the same sentence structure as ' \ | |
| '<{sentence}> using simple vocabulary' | |
| def generate(cls, sentences: List[str], question_num: int = 1, option_num=None, model=None, | |
| generate_mode: bool = False) -> List[Dict]: | |
| responses = [] | |
| sentences = _materials_select(sentences, question_num) | |
| for sentence in sentences: | |
| if generate_mode: | |
| try: | |
| prompt = 'f"' + cls.generate_similar_sentence_prompt + '"' | |
| prompt = eval(prompt) | |
| sent = _generate(prompt, model) | |
| sent = re.split(r'[.?!]', sent)[0] | |
| except: | |
| sent = sentence | |
| else: | |
| sent = sentence | |
| answer = sent | |
| words = sent.split(' ') | |
| words_num = len(words) | |
| stem = ['______'] * words_num | |
| # stem = stem[:-1] | |
| # symbol = words[-1][-1] | |
| for i in range(words_num): | |
| if words[i][-1] in ['.', '?', '!', '。', '?', '!']: | |
| stem[i] = stem[i] + words[i][-1] | |
| words[i] = words[i][:-1] | |
| words_copy = words.copy() | |
| while words_copy == words: | |
| random.shuffle(words) | |
| stem = ' '.join(stem) | |
| words = ', '.join(words) | |
| question = stem + '\n' + words | |
| responses.append({'question': question, 'answer': answer}) | |
| return responses | |
| class PhraseTranslatingMatchChoice(object): | |
| title = '请根据汉语意思选择正确的短语' | |
| generate_similar_phrase_prompt = 'Please generate {num} phrases with the same phrase structure as <{phrase}> by ' \ | |
| 'replacing a single word in this phrase. Please return the answer in a list. ' \ | |
| 'Try your best to use simple vocabulary when generating phrases.' | |
| translate_chinese_phrase_prompt = 'Please translate the following phrase to chinese: <{phrase}>' | |
| def generate(cls, phrases: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]: | |
| responses = [] | |
| phrases = _materials_select(phrases, question_num) | |
| num = option_num - 1 | |
| for phrase in phrases: | |
| try: | |
| phrase = phrase[0].upper() + phrase[1:] | |
| option_prompt = 'f"' + cls.generate_similar_phrase_prompt + '"' | |
| option_prompt = eval(option_prompt) | |
| option_response = _generate(option_prompt, model) | |
| options = response_to_options(option_response) | |
| options.append(phrase) | |
| answer, options = normalize_options_and_answer(phrase, options) | |
| stem_prompt = 'f"' + cls.translate_chinese_phrase_prompt + '"' | |
| stem_prompt = eval(stem_prompt) | |
| stem_response = _generate(stem_prompt, model) | |
| question = stem_response + '\n' + options | |
| responses.append({'question': question, 'answer': answer}) | |
| except Exception as e: | |
| print(e) | |
| return responses | |
| class SentenceCompleteChoice(object): | |
| title = '请选择正确的短语补全句子' | |
| generate_sentence_complete_question_prompt = 'Assuming you are an English teacher and a question consists of three ' \ | |
| 'parts: the stem, options, and answer. please provide a multiple-choice ' \ | |
| 'question based on the sentence <{sentence}>.The specific steps are: ' \ | |
| 'Randomly blank out a part of the sentence and replace it with ____ to ' \ | |
| 'create the stem.Generate {num} new phrases/words that are ' \ | |
| 'structurally consistent with the blanked-out part and include the new ' \ | |
| 'phrases/words and blanked-out part as the options.Use the blanked-out ' \ | |
| 'part as the answer. For example:' \ | |
| 'Stem: And now it is time to ____, we will leave our lovely school ' \ | |
| 'Options: A: celebrate B: say goodbye C: take a break D: graduate' \ | |
| 'Answer: D: graduate ' \ | |
| \ | |
| def generate(cls, sentences: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]: | |
| responses = [] | |
| sentences = _materials_select(sentences, question_num) | |
| num = option_num - 1 | |
| for sentence in sentences: | |
| try: | |
| prompt = 'f"' + cls.generate_sentence_complete_question_prompt + '"' | |
| prompt = eval(prompt) | |
| prompt = prompt.replace('.', '\n') | |
| response = _generate(prompt, model) | |
| question, answer = response_to_question(response) | |
| responses.append({'question': question, 'answer': answer}) | |
| except Exception as e: | |
| print(e) | |
| return responses | |
| class GrammarChoice(object): | |
| title = '请根据语法知识选择正确的选项补全句子' | |
| generate_grammar_question_prompt = 'Assuming you are an English teacher and a question consists of three parts: the ' \ | |
| 'stem, options, and answer. Please provide a sentence completion type ' \ | |
| 'multiple-choice question to test students understanding of the grammar rule' \ | |
| ' <{grammar}>.The specific steps are: Generate a sentence using this ' \ | |
| 'grammar rule with simple vocabulary. Blank out the part related to the grammar ' \ | |
| 'of this sentence. Use the sentence that has been blanked out as the stem.' \ | |
| 'Generate {num} phrases/words with the same meaning but different usage using ' \ | |
| 'the blanked-out part and include the new phrases/words and blanked-out part as ' \ | |
| 'the options.Use the blanked-out part as the correct answer.For example:' \ | |
| 'Stem: And now it is time to ____, we will leave our lovely school ' \ | |
| 'Options: A: celebrate B: celebrating C: celebrated D: celebrates ' \ | |
| 'Answer: D: graduate' | |
| def generate(cls, grammars: List[str], question_num: int = 1, option_num: int = 4, model=None) -> List[Dict]: | |
| responses = [] | |
| num = option_num - 1 | |
| grammars = _materials_select(grammars, question_num) | |
| for grammar in grammars: | |
| try: | |
| prompt = 'f"' + cls.generate_grammar_question_prompt + '"' | |
| prompt = eval(prompt) | |
| prompt = prompt.replace('.', '\n') | |
| response = _generate(prompt, model) | |
| question, answer = response_to_question(response) | |
| responses.append({'question': question, 'answer': answer}) | |
| except Exception as e: | |
| print(e) | |
| return responses | |
| def txt_to_list(txt): | |
| while txt[0] == '\n': | |
| txt = txt[1:] | |
| while txt[-1] == '\n': | |
| txt = txt[:-1] | |
| txt = txt.split('\n') | |
| txt = [i for i in txt if i is not None] | |
| return txt | |
| # def generate_scope(progress: str, q_type): | |
| # """ | |
| # :param progress: | |
| # :param q_type: | |
| # :return: | |
| # """ | |
| # | |
| # if progress == '期中': | |
| # progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7'] | |
| # elif progress == '期末': | |
| # progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7', 'unit8', 'unit9', | |
| # 'unit10', 'unit11', 'unit12', 'unit13', 'unit14'] | |
| # else: | |
| # progress = [progress] | |
| # | |
| # scope = {'word': [], 'phrase': [], 'sentence': [], 'grammar': []} | |
| # | |
| # root_path = Path(__file__).parent | |
| # for i in progress: | |
| # path = root_path.joinpath('material', i + '.txt') | |
| # with open(path, 'r', encoding='utf-8') as file: | |
| # content = file.read() | |
| # # scope = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content) | |
| # _, word, phrase, sentence, grammar = re.split(r'<word>|<phrase>|<sentence>|<grammar>', content) | |
| # scope['word'].extend(txt_to_list(word)) | |
| # scope['phrase'].extend(txt_to_list(phrase)) | |
| # scope['sentence'].extend(txt_to_list(sentence)) | |
| # scope['grammar'].extend(txt_to_list(grammar)) | |
| # m_type = question_type_to_material_type[q_type] | |
| # return scope[m_type] | |
| # def generate(progress, q_type, q_num, **kwargs): | |
| # """ | |
| # :param progress: | |
| # :param q_type: | |
| # :param q_num: | |
| # :param kwargs: | |
| # model is a must when use the llm, for example: | |
| # model = 'openai_3' | |
| # :return: | |
| # """ | |
| # materials = generate_scope(progress, q_type) | |
| # return eval(q_type).generate(materials, q_num, **kwargs) | |
| def generate(materials, q_type, q_num, **kwargs): | |
| """ | |
| :param materials: | |
| :param q_type: | |
| :param q_num: | |
| :param kwargs: | |
| model is a must when use the llm, for example: | |
| model = 'chatgpt_3.5' | |
| :return: | |
| """ | |
| # materials = generate_scope(progress, q_type) | |
| return eval(question_type_to_class_type[q_type]).generate(materials, q_num, **kwargs) | |
| # ans1 = generate_word_complete_question(['word', 'manager', 'answer', 'fight', 'jump', 'hihhttgrh']) | |
| # ans2 = generate_phrase_select_question(['the Dragon Boat Festival'], 3) | |
| # ans3 = generate_sentence_recomposition_question(['You really require a lot of talent and hard work to succeed.', | |
| # 'I have a job interview tomorrow.], False) | |
| # ans4 = generate_sentence_complete_question(['You really require a lot of talent and hard work to succeed.', | |
| # 'I have a job interview tomorrow.']) | |
| # ans5 = generate_grammar_question(['be supposed to', 'It is + adj. + 动词不定式']) | |
| # print(ans5) | |