Spaces:

ameythakur
/

Deepfake-Audio

Running

App Files Files Community

Deepfake-Audio / Source Code /synthesizer /utils /text.py

ameythakur

Deepfake-Audio

1d8403e verified 2 months ago

raw

history blame contribute delete

3.58 kB

	# ==================================================================================================
	# DEEPFAKE AUDIO - synthesizer/utils/text.py (Linguistic Tokenization Engine)
	# ==================================================================================================
	#
	# 📝 DESCRIPTION
	# This module implements the text-to-sequence transformation logic. It handles
	# cleaning, ARPAbet embedding (via curly brace detection), and numeric ID
	# mapping, converting human-readable text into neural embeddings for the
	# synthesizer.
	#
	# 👤 AUTHORS
	# - Amey Thakur (https://github.com/Amey-Thakur)
	# - Mega Satish (https://github.com/msatmod)
	#
	# 🤝🏻 CREDITS
	# Original Real-Time Voice Cloning methodology by CorentinJ
	# Repository: https://github.com/CorentinJ/Real-Time-Voice-Cloning
	#
	# 🔗 PROJECT LINKS
	# Repository: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO
	# Video Demo: https://youtu.be/i3wnBcbHDbs
	# Research: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO/blob/main/DEEPFAKE-AUDIO.ipynb
	#
	# 📜 LICENSE
	# Released under the MIT License
	# Release Date: 2021-02-06
	# ==================================================================================================

	from synthesizer.utils.symbols import symbols
	from synthesizer.utils import cleaners
	import re

	# Neural Mapping: Bidi dictionary for character/ID conversion
	_symbol_to_id = {s: i for i, s in enumerate(symbols)}
	_id_to_symbol = {i: s for i, s in enumerate(symbols)}

	# ARPAbet Detection: Identifies phonetic sequences in curly braces
	_curly_re = re.compile(r"(.?)\{(.+?)\}(.)")

	def text_to_sequence(text, cleaner_names):
	"""
	Symbolic Ingestion:
	Converts raw text into a categorical sequence of token IDs.
	Supports hybrid text/ARPAbet inputs via curly brace tagging.
	"""
	sequence = []

	# Sequential Parsing Segment by Segment
	while len(text):
	m = _curly_re.match(text)
	if not m:
	sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
	break
	sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
	sequence += _arpabet_to_sequence(m.group(2))
	text = m.group(3)

	# Termination: Append EOS token
	sequence.append(_symbol_to_id["~"])
	return sequence

	def sequence_to_text(sequence):
	"""Linguistic Restoration: Decodes token ID sequences back into human-readable text."""
	result = ""
	for symbol_id in sequence:
	if symbol_id in _id_to_symbol:
	s = _id_to_symbol[symbol_id]
	# Handle ARPAbet re-bracketing
	if len(s) > 1 and s[0] == "@":
	s = "{%s}" % s[1:]
	result += s
	return result.replace("}{", " ")

	def _clean_text(text, cleaner_names):
	"""Pipeline Orchestration: Runs text through specified normalization filters."""
	for name in cleaner_names:
	cleaner = getattr(cleaners, name)
	if not cleaner:
	raise Exception("Unknown cleaner: %s" % name)
	text = cleaner(text)
	return text

	def _symbols_to_sequence(symbols):
	"""ID Conversion: Maps a list of character tokens to their numeric analogues."""
	return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]

	def _arpabet_to_sequence(text):
	"""Phonetic Encoding: Converts space-delimited ARPAbet codes into ID sequences."""
	return _symbols_to_sequence(["@" + s for s in text.split()])

	def _should_keep_symbol(s):
	"""Filter Logic: Ensures only valid tokens are included in the sequence."""
	return s in _symbol_to_id and s not in ("_", "~")