Spaces:

weiwei1392
/

question_generate

Sleeping

question_generate / question.py

weiwei1392

init

d8132e8 over 2 years ago

22.6 kB

	import re
	import math
	from pathlib import Path
	from typing import List, Dict
	from translate import Translator
	from config import *
	from prompt import *
	from llm import OpenAI3, OpenAI4
	import random



	# question_type_to_material_type = {'WordCompleteChoice': 'word',
	# 'WordCompleteBlank': 'word',
	# 'WordRightSpellingChoice': 'word',
	# 'PhraseTranslatingMatchChoice': 'phrase',
	# 'SentenceRecompositionBlank': 'sentence',
	# 'SentenceCompleteChoice': 'sentence',
	# 'GrammarChoice': 'grammar'
	# }


	question_type_to_class_type = {'请将选择正确的拼写补全单词': 'WordCompleteChoice' ,
	'根据汉语意思补全单词': 'WordCompleteBlank',
	'根据汉语意思选出拼写正确的单词': 'WordRightSpellingChoice',
	'请根据汉语意思选择正确的短语': 'PhraseTranslatingMatchChoice',
	'请将下列单词组成完整的一句话': 'SentenceRecompositionBlank',
	'请选择正确的短语补全句子': 'SentenceCompleteChoice',
	'请根据语法知识选择正确的选项补全句子': 'GrammarChoice'
	}


	def trans_en_to_cn(word: str) -> str:
	if word in ['crayon', 'Crayon']:
	chinese = '蜡笔'
	else:
	chinese = Translator(from_lang='English', to_lang='Chinese').translate(word)
	return chinese


	def delete_index(string: str) -> str:
	if len(string) > 1: # incase of the sting is a single letter when generate word complete question
	if string[1] in ['.', ':', '、', '：', ')', '）']:
	string = string[2:]
	return string


	def normalize_options_and_answer(answer, options):
	if len(options) != 1:
	random.shuffle(options)
	options = [delete_index(option) for option in options]
	answer = delete_index(answer)
	answer_index = options.index(answer)
	answer = number_letter_dict[answer_index] + ':' + answer
	options = [number_letter_dict[i] + ':' + options[i] for i in range(len(options))]
	options = ','.join(options)
	else:
	options = options[0]

	return answer, options


	def response_to_question(response):
	try:
	try:
	_, stem, options, answer = re.split(
	r'stem: \|stem:\|Stem: \|Stem:'
	r'\|options: \|options:\|Options: \|Options:'
	r'\|Correct answer: \|Correct answer:\|correct answer: \|correct answer:'
	r'\|answer: \|answer:\|Answer: \|Answer:'
	, response)
	except:

	try:
	_, stem, options = re.split(
	r'stem: \|stem:\|Stem: \|Stem:'
	r'\|options: \|options:\|Options: \|Options:'
	, response)
	answer = None

	except:
	_, question, answer = re.split(
	r'question: \|question:\|Question: \|Question:'
	r'\|Correct answer: \|Correct answer:\|correct answer: \|correct answer:'
	r'\|answer: \|answer:\|Answer: \|Answer:'
	, response)
	stem = None
	options = None
	while question[-1] == '\n':
	question = question[:-1]

	if stem:
	while stem[-1] == '\n':
	stem = stem[:-1]

	if options:
	options = options.split('\n')
	options = [option for option in options if option != '']

	if answer is None:
	answer = 'None'
	for i in range(len(options)):
	if 'correct answer' in options[i]:
	answer = options[i]
	answer = re.split(r'\(correct answer\)\|correct answer', answer)
	answer = [i for i in answer if i is not None][0]
	options[i] = answer

	answer = answer.split('\n')[0]
	answer, options = normalize_options_and_answer(answer, options)
	question = stem + '\n' + options

	except Exception as e:
	print(f'fail!,reason:{e},response:{response}')
	question = 'fail!'
	answer = 'fail!'

	return question, answer


	def response_to_options(response):

	options = response.split('\n')
	options = [delete_index(option) for option in options]

	return options


	def _materials_select(materials: List[str], question_num: int, option_num: int = 1):
	"""
	:param materials:
	:param question_num:
	:param option_num:
	:return:
	"""
	material = materials * math.ceil(option_num/len(materials))
	select_materials = [random.sample(material, option_num) for i in range(question_num)]
	if option_num == 1:
	select_materials = [i[0] for i in select_materials]

	# random.shuffle(material_index)
	# if len(material_index) >= question_num:
	# material_index = material_index[:question_num]
	# else:
	# times = int(question_num / len(materials))
	# remain = question_num - len(materials) * times
	# material_index = material_index * times + material_index[:remain]
	# select_materials = [materials[i] for i in material_index]
	return select_materials


	def _generate(prompt, model='openai_3'):
	model = eval(model_name[model])()
	response = model(prompt)
	return response


	def _generate_fake_word(word):
	word_vowels = []
	word_vowel = {}
	for vowels in vowels_list:
	if vowels in word:
	word_vowels.append(vowels)

	for i in range(len(word)):
	if word[i] in vowel_list:
	word_vowel[i] = word[i]

	if word_vowels:
	true_letters = word_vowels[random.randint(0, len(word_vowels) - 1)]
	fake_letters = [i for i in vowels_list if (len(i) == len(true_letters)) & (i != true_letters)]
	fake_letters = random.choice(fake_letters)
	word = word.split(true_letters)
	index = random.randint(0, len(word) - 2)
	for i in range(len(word) - 1):
	if i != index:
	word[i] = word[i] + true_letters
	else:
	word[i] = word[i] + fake_letters
	fake_word = ''.join(word)

	elif word_vowel:
	index = random.choice(list(word_vowel.keys()))
	true_letter = word[index]
	vowel_list_copy = vowel_list.copy()
	vowel_list_copy.pop(vowel_list_copy.index(true_letter))
	fake_letters = random.choice(vowel_list_copy)
	fake_word = word[:index] + fake_letters + word[index+1:]
	else:
	index = random.choice(range(len(word)))
	fake_word = word[:index] + word[index] + word[:index]

	return fake_word


	class WordCompleteChoice(object):

	title = '请将选择正确的拼写补全单词'

	@staticmethod
	def generate(words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:
	responses = []

	words = _materials_select(words, question_num)
	for word in words:

	word_vowels = []
	for vowels in vowels_list:
	if vowels in word:
	word_vowels.append(vowels)

	if word_vowels:
	answer = word_vowels[random.randint(0, len(word_vowels) - 1)]

	stem = ''
	_stem = word.split(answer)
	index = random.randint(0, len(_stem) - 2)
	for i in range(len(_stem) - 1):
	if i != index:
	stem = stem + _stem[i] + answer
	else:
	stem = stem + _stem[i] + '_' * len(answer)
	stem = stem + _stem[-1]

	options = [i for i in vowels_list if (len(i) == len(answer)) & (i != answer)]
	if len(options) > option_num - 1:
	random.shuffle(options)
	options = options[:option_num - 1]
	options.append(answer)

	else:
	index = list(range(len(word)))
	num = random.randint(2, 3)
	num = len(word) - 3 if num > len(word) - 3 else num

	random.shuffle(index)
	ans_index = index[:num]
	ans_index.sort()
	answer = [word[i] + ',' for i in ans_index]
	answer = ''.join(answer)
	answer = answer[:-1]

	stem = ['_' if i in ans_index else word[i] for i in range(len(word))]
	stem = ''.join(stem)

	options = [answer]
	count = 1
	while count < option_num:
	random.shuffle(index)
	option_index = index[:num]
	option = [word[i] + ',' for i in option_index]
	option = ''.join(option)
	option = option[:-1]
	if option not in options:
	options.append(option)
	count += 1

	answer, options = normalize_options_and_answer(answer, options)

	question = stem + '\n' + options

	responses.append({'question': question, 'answer': answer})

	return responses


	class WordCompleteBlank(object):

	title = '根据汉语意思补全单词'

	@classmethod
	def generate(cls, words: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]:

	responses = []
	_words = _materials_select(words, question_num, option_num)

	for words in _words:

	answer_list = []
	stem_list = []

	for word in words:
	word_vowels = []
	for vowel in vowels_list:
	if vowel in word:
	word_vowels.append(vowel)


	chinese = trans_en_to_cn(word)

	if word_vowels:
	letters = word_vowels[random.randint(0, len(word_vowels) - 1)]

	stem = chinese + ':'
	_word = word.split(letters)
	index = random.randint(0, len(_word) - 2)
	for i in range(len(_word) - 1):
	if i != index:
	stem = stem + _word[i] + letters
	else:
	stem = stem + _word[i] + '_' * len(letters)
	stem = stem + _word[-1]

	else:

	num = random.randint(2, 3)
	num = len(word) - 2 if num > len(word) - 2 else num
	index = random.randint(0, (len(word)-num+1))
	stem = chinese + ':' + word[:index] + '_' * num + word[index+num:]

	stem_list.append(stem)

	answer = chinese + ':' + word
	answer_list.append(answer)

	responses.append({'question': ' '.join(stem_list), 'answer': ' '.join(answer_list)})

	return responses


	class WordRightSpellingChoice(object):

	title = '根据汉语意思选出拼写正确的单词'

	@classmethod
	def generate(cls, words: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:

	responses = []
	words = _materials_select(words, question_num)

	for word in words:
	stem = trans_en_to_cn(word)
	answer = word
	options = [_generate_fake_word(word) for i in range(option_num-1)]
	options.append(answer)
	answer, options = normalize_options_and_answer(answer, options)

	question = stem + '\n' + options

	responses.append({'question': question, 'answer': answer})

	return responses


	class SentenceRecompositionBlank(object):

	title = '请将下列单词组成完整的一句话'

	generate_similar_sentence_prompt = 'Please generate a sentence with the same sentence structure as ' \
	'<{sentence}> using simple vocabulary'

	@classmethod
	def generate(cls, sentences: List[str], question_num: int = 1, option_num=None, model=None,
	generate_mode: bool = False) -> List[Dict]:

	responses = []

	sentences = _materials_select(sentences, question_num)
	for sentence in sentences:
	if generate_mode:
	try:
	prompt = 'f"' + cls.generate_similar_sentence_prompt + '"'
	prompt = eval(prompt)
	sent = _generate(prompt, model)
	sent = re.split(r'[.?!]', sent)[0]
	except:
	sent = sentence
	else:
	sent = sentence
	answer = sent

	words = sent.split(' ')
	words_num = len(words)
	stem = ['______'] * words_num
	# stem = stem[:-1]
	# symbol = words[-1][-1]
	for i in range(words_num):
	if words[i][-1] in ['.', '?', '!', '。', '？', '！']:
	stem[i] = stem[i] + words[i][-1]
	words[i] = words[i][:-1]

	words_copy = words.copy()
	while words_copy == words:
	random.shuffle(words)
	stem = ' '.join(stem)
	words = ', '.join(words)

	question = stem + '\n' + words
	responses.append({'question': question, 'answer': answer})

	return responses


	class PhraseTranslatingMatchChoice(object):

	title = '请根据汉语意思选择正确的短语'

	generate_similar_phrase_prompt = 'Please generate {num} phrases with the same phrase structure as <{phrase}> by ' \
	'replacing a single word in this phrase. Please return the answer in a list. ' \
	'Try your best to use simple vocabulary when generating phrases.'

	translate_chinese_phrase_prompt = 'Please translate the following phrase to chinese: <{phrase}>'

	@classmethod
	def generate(cls, phrases: List[str], question_num: int = 1, option_num: int = 2, model=None) -> List[Dict]:

	responses = []
	phrases = _materials_select(phrases, question_num)
	num = option_num - 1
	for phrase in phrases:
	try:
	phrase = phrase[0].upper() + phrase[1:]
	option_prompt = 'f"' + cls.generate_similar_phrase_prompt + '"'
	option_prompt = eval(option_prompt)
	option_response = _generate(option_prompt, model)
	options = response_to_options(option_response)
	options.append(phrase)
	answer, options = normalize_options_and_answer(phrase, options)
	stem_prompt = 'f"' + cls.translate_chinese_phrase_prompt + '"'
	stem_prompt = eval(stem_prompt)
	stem_response = _generate(stem_prompt, model)
	question = stem_response + '\n' + options
	responses.append({'question': question, 'answer': answer})
	except Exception as e:
	print(e)
	return responses


	class SentenceCompleteChoice(object):

	title = '请选择正确的短语补全句子'

	generate_sentence_complete_question_prompt = 'Assuming you are an English teacher and a question consists of three ' \
	'parts: the stem, options, and answer. please provide a multiple-choice ' \
	'question based on the sentence <{sentence}>.The specific steps are: ' \
	'Randomly blank out a part of the sentence and replace it with ____ to ' \
	'create the stem.Generate {num} new phrases/words that are ' \
	'structurally consistent with the blanked-out part and include the new ' \
	'phrases/words and blanked-out part as the options.Use the blanked-out ' \
	'part as the answer. For example:' \
	'Stem: And now it is time to ____, we will leave our lovely school ' \
	'Options: A: celebrate B: say goodbye C: take a break D: graduate' \
	'Answer: D: graduate ' \
	\


	@classmethod
	def generate(cls, sentences: List[str], question_num: int = 1, option_num: int = 3, model=None) -> List[Dict]:
	responses = []
	sentences = _materials_select(sentences, question_num)
	num = option_num - 1
	for sentence in sentences:
	try:
	prompt = 'f"' + cls.generate_sentence_complete_question_prompt + '"'
	prompt = eval(prompt)
	prompt = prompt.replace('.', '\n')

	response = _generate(prompt, model)
	question, answer = response_to_question(response)
	responses.append({'question': question, 'answer': answer})
	except Exception as e:
	print(e)
	return responses


	class GrammarChoice(object):

	title = '请根据语法知识选择正确的选项补全句子'

	generate_grammar_question_prompt = 'Assuming you are an English teacher and a question consists of three parts: the ' \
	'stem, options, and answer. Please provide a sentence completion type ' \
	'multiple-choice question to test students understanding of the grammar rule' \
	' <{grammar}>.The specific steps are: Generate a sentence using this ' \
	'grammar rule with simple vocabulary. Blank out the part related to the grammar ' \
	'of this sentence. Use the sentence that has been blanked out as the stem.' \
	'Generate {num} phrases/words with the same meaning but different usage using ' \
	'the blanked-out part and include the new phrases/words and blanked-out part as ' \
	'the options.Use the blanked-out part as the correct answer.For example:' \
	'Stem: And now it is time to ____, we will leave our lovely school ' \
	'Options: A: celebrate B: celebrating C: celebrated D: celebrates ' \
	'Answer: D: graduate'

	@classmethod
	def generate(cls, grammars: List[str], question_num: int = 1, option_num: int = 4, model=None) -> List[Dict]:
	responses = []

	num = option_num - 1
	grammars = _materials_select(grammars, question_num)
	for grammar in grammars:
	try:
	prompt = 'f"' + cls.generate_grammar_question_prompt + '"'
	prompt = eval(prompt)
	prompt = prompt.replace('.', '\n')
	response = _generate(prompt, model)
	question, answer = response_to_question(response)
	responses.append({'question': question, 'answer': answer})
	except Exception as e:
	print(e)
	return responses


	def txt_to_list(txt):
	while txt[0] == '\n':
	txt = txt[1:]
	while txt[-1] == '\n':
	txt = txt[:-1]

	txt = txt.split('\n')
	txt = [i for i in txt if i is not None]
	return txt


	# def generate_scope(progress: str, q_type):
	# """
	# :param progress:
	# :param q_type:
	# :return:
	# """
	#
	# if progress == '期中':
	# progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7']
	# elif progress == '期末':
	# progress = ['unit1', 'unit2', 'unit3', 'unit4', 'unit5', 'unit6', 'unit7', 'unit8', 'unit9',
	# 'unit10', 'unit11', 'unit12', 'unit13', 'unit14']
	# else:
	# progress = [progress]
	#
	# scope = {'word': [], 'phrase': [], 'sentence': [], 'grammar': []}
	#
	# root_path = Path(__file__).parent
	# for i in progress:
	# path = root_path.joinpath('material', i + '.txt')
	# with open(path, 'r', encoding='utf-8') as file:
	# content = file.read()
	# # scope = re.split(r'<word>\|<phrase>\|<sentence>\|<grammar>', content)
	# _, word, phrase, sentence, grammar = re.split(r'<word>\|<phrase>\|<sentence>\|<grammar>', content)
	# scope['word'].extend(txt_to_list(word))
	# scope['phrase'].extend(txt_to_list(phrase))
	# scope['sentence'].extend(txt_to_list(sentence))
	# scope['grammar'].extend(txt_to_list(grammar))
	# m_type = question_type_to_material_type[q_type]
	# return scope[m_type]


	# def generate(progress, q_type, q_num, **kwargs):
	# """
	# :param progress:
	# :param q_type:
	# :param q_num:
	# :param kwargs:
	# model is a must when use the llm, for example:
	# model = 'openai_3'
	# :return:
	# """
	# materials = generate_scope(progress, q_type)
	# return eval(q_type).generate(materials, q_num, **kwargs)

	def generate(materials, q_type, q_num, **kwargs):
	"""
	:param materials:
	:param q_type:
	:param q_num:
	:param kwargs:
	model is a must when use the llm, for example:
	model = 'chatgpt_3.5'
	:return:
	"""
	# materials = generate_scope(progress, q_type)
	return eval(question_type_to_class_type[q_type]).generate(materials, q_num, **kwargs)



	# ans1 = generate_word_complete_question(['word', 'manager', 'answer', 'fight', 'jump', 'hihhttgrh'])

	# ans2 = generate_phrase_select_question(['the Dragon Boat Festival'], 3)

	# ans3 = generate_sentence_recomposition_question(['You really require a lot of talent and hard work to succeed.',
	# 'I have a job interview tomorrow.], False)

	# ans4 = generate_sentence_complete_question(['You really require a lot of talent and hard work to succeed.',
	# 'I have a job interview tomorrow.'])

	# ans5 = generate_grammar_question(['be supposed to', 'It is + adj. + 动词不定式'])
	# print(ans5)