Spaces:

lingyu98
/

CiJiang

Sleeping

App Files Files Community

CiJiang / cijiang /utils.py

lingyu98

Update cijiang/utils.py

f2a3e0e verified 7 months ago

raw

history blame contribute delete

3.71 kB

	from collections import defaultdict
	import json
	from colorama import Fore, Style, init

	init(autoreset=True)

	# with open('rules/ALL_SYLLABLES.txt', 'r', encoding='utf-8') as f:
	# ALL_SYLLABLES = f.read().strip().split()
	# ALL_SYLLABLES = [syllable for syllable in ALL_SYLLABLES if syllable]

	YUNMU_LIST = ['a', 'o', 'e', 'i', 'u', 'v',
	'ai', 'ei', 'ao', 'ou', 'ia', 'ie', 'iao', 'iu', 'ua', 'uo', 'uai', 'ui', 've',
	'an', 'en', 'in', 'un', 'vn', 'ian', 'uan', 'vuan',
	'ang', 'eng', 'ing', 'ong',
	'zhi', 'chi', 'shi', 'ri', 'zi', 'ci', 'si',
	'yi', 'wu', 'yu', 'yin', 'yun', 'ye', 'yue', 'yuan','ying']

	def get_yunmu(syllable):
	syllable = syllable.lower().replace('ü', 'v')
	yunmu_list = sorted(YUNMU_LIST, key=lambda x: -len(x))

	if syllable in yunmu_list:
	return syllable

	shengmus = [
	'zh', 'ch', 'sh', 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
	'j', 'q', 'x', 'z', 'c', 's', 'r', 'y', 'w'
	]
	for shengmu in sorted(shengmus, key=lambda x: -len(x)):
	if syllable.startswith(shengmu):
	possible_yunmu = syllable[len(shengmu):]

	for yunmu in yunmu_list:
	if possible_yunmu == yunmu:
	return yunmu

	if shengmu in ['j', 'q', 'x', 'y'] and possible_yunmu.startswith('u'):

	possible_yunmu_v = 'v' + possible_yunmu[1:]
	for yunmu in yunmu_list:
	if possible_yunmu_v == yunmu:
	return yunmu

	if shengmu == 'y':
	y_map = {
	'u': 'yu',
	'ue': 'yue',
	'uan': 'yuan',
	'un': 'yun',
	'i': 'yi',
	'in': 'yin',
	'ing': 'ying',
	'e': 'ye'
	}
	if possible_yunmu in y_map:
	return y_map[possible_yunmu]

	if shengmu == 'w' and possible_yunmu == 'u':
	return 'wu'

	if shengmu == 'y' and possible_yunmu == 'i':
	return 'yi'

	if shengmu == 'y' and possible_yunmu == 'v':
	return 'yu'

	if possible_yunmu.startswith('v'):
	for yunmu in yunmu_list:
	if possible_yunmu == yunmu:
	return yunmu
	for yunmu in yunmu_list:
	if syllable == yunmu:
	return yunmu
	for yunmu in yunmu_list:
	if syllable.endswith(yunmu):
	return yunmu
	return None


	def print_results(rhymer, text, target_rhyme, top_results=8, beam_width=20, num_candidates=5000):
	out = rhymer.get_rhymes(text, target_rhyme, beam_width=beam_width, num_candidates=num_candidates)
	mask_count = text.count("[M]")
	context = text.split('[M]')[0]

	print(f"======= 韵脚: \|{target_rhyme}\|")
	for i, (seq, log_prob) in enumerate(out[:top_results]):
	rhymes = seq[-mask_count:].split()
	colored_rhymes = [Fore.RED + part + Style.RESET_ALL if idx < mask_count else part for idx, part in enumerate(rhymes)]
	colored_rhymes = ''.join(colored_rhymes) # Join the parts back together

	print(f"{i+1}. {context}{colored_rhymes} (score: {log_prob:.3f})")
	print("=" + "=" * 40)


	if __name__ == "__main__":
	syllable_to_yunmu = defaultdict(str)
	for syllable in ALL_SYLLABLES:
	yunmu = get_yunmu(syllable)
	if yunmu:
	syllable_to_yunmu[syllable] = yunmu

	with open('rules/syllable_to_yunmu.json', 'w', encoding='utf-8') as f:
	json.dump(syllable_to_yunmu, f, ensure_ascii=False, indent=4)