Spaces:

ASLP-lab
/

DiffRhythm

Running on Zero

App Files Files Community

DiffRhythm / diffrhythm /g2p /g2p /german.py

ing0

infer

b96e750 10 months ago

raw

history blame

1.98 kB

	# Copyright (c) 2024 Amphion.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	import re

	"""
	Text clean time
	"""
	rep_map = {
	"：": ",",
	"；": ",",
	"，": ",",
	"。": ".",
	"！": "!",
	"？": "?",
	"\n": ".",
	"·": ",",
	"、": ",",
	"...": ".",
	"…": ".",
	"$": ".",
	"“": "",
	"”": "",
	"‘": "",
	"’": "",
	"（": "",
	"）": "",
	"(": "",
	")": "",
	"《": "",
	"》": "",
	"【": "",
	"】": "",
	"[": "",
	"]": "",
	"—": "",
	"～": "-",
	"~": "-",
	"「": "",
	"」": "",
	"¿": "",
	"¡": "",
	}


	def collapse_whitespace(text):
	# Regular expression matching whitespace:
	_whitespace_re = re.compile(r"\s+")
	return re.sub(_whitespace_re, " ", text).strip()


	def remove_punctuation_at_begin(text):
	return re.sub(r"^[,.!?]+", "", text)


	def remove_aux_symbols(text):
	text = re.sub(r"[\<\>\[\]\"\«\»]+", "", text)
	return text


	def replace_symbols(text):
	text = text.replace(";", ",")
	text = text.replace("-", " ")
	text = text.replace(":", ",")
	return text


	def replace_punctuation(text):
	pattern = re.compile("\|".join(re.escape(p) for p in rep_map.keys()))
	replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
	return replaced_text


	def text_normalize(text):
	text = replace_punctuation(text)
	text = replace_symbols(text)
	text = remove_aux_symbols(text)
	text = remove_punctuation_at_begin(text)
	text = collapse_whitespace(text)
	text = re.sub(r"([^\.,!\?\-…])$", r"\1", text)
	return text


	def german_to_ipa(text, text_tokenizer):
	if type(text) == str:
	text = text_normalize(text)
	phonemes = text_tokenizer(text)
	return phonemes
	else:
	for i, t in enumerate(text):
	text[i] = text_normalize(t)
	return text_tokenizer(text)