ELEVENLAB / app.py
mr-don88's picture
Update app.py
baa7af5 verified
raw
history blame
108 kB
import spaces
from vansarah import KModel, KPipeline
import gradio as gr
import torch
import numpy as np
import wave
import io
import time
import re
import json
from typing import List, Tuple, Optional, Dict
from pydub import AudioSegment
from pydub.effects import normalize, compress_dynamic_range, low_pass_filter, high_pass_filter
import os
import random
from phonemizer import backend
from datetime import timedelta
# Tokenizer class with enhanced text processing
class Tokenizer:
def __init__(self):
self.VOCAB = self._get_vocab()
self.special_cases = self._build_special_cases()
self.special_regex = self._build_special_regex()
self.abbreviation_patterns = self._build_abbreviation_patterns()
self.phonemizers = {
'en-us': backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True),
'en-gb': backend.EspeakBackend(language='en-gb', preserve_punctuation=True, with_stress=True),
}
@staticmethod
def _get_vocab():
_pad = "$"
_punctuation = ';:,.!?¡¿—…"«»“” '
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
return {symbol: index for index, symbol in enumerate(symbols)}
def _build_special_cases(self) -> Dict[str, str]:
"""Từ điển các từ đặc biệt cần chuẩn hóa cách đọc"""
cases = {
# 1. Đại từ nhân xưng
"I": "I", "me": "me", "my": "my", "mine": "mine", "myself": "myself",
"you": "you", "your": "your", "yours": "yours", "yourself": "yourself",
"he": "he", "him": "him", "his": "his", "himself": "himself",
"she": "she", "her": "her", "hers": "hers", "herself": "herself",
"we": "we", "us": "us", "our": "our", "ours": "ours", "ourselves": "ourselves",
"they": "they", "them": "them", "their": "their", "theirs": "theirs",
"it": "it", "its": "its", "itself": "itself",
"yourselves": "yourselves", "oneself": "oneself",
# 2. Động từ to be/trợ động từ
"is": "is", "are": "are", "am": "am", "was": "was", "were": "were",
"be": "be", "been": "been", "being": "being", "have": "have",
"has": "has", "had": "had", "do": "do", "does": "does", "did": "did",
"can": "can", "could": "could", "will": "will", "would": "would",
"shall": "shall", "should": "should", "may": "may", "might": "might",
"must": "must", "ought": "ought", "dare": "dare", "need": "need",
"used": "used", "going": "going",
# 3. Giới từ/Liên từ
"and": "AND", "or": "or", "but": "but", "for": "for", "nor": "nor",
"yet": "yet", "so": "so", "the": "the", "a": "a", "an": "an",
"this": "this", "that": "that", "these": "these", "those": "those",
"in": "in", "on": "on", "at": "at", "by": "by", "with": "with",
"without": "without", "to": "to", "from": "from", "of": "of",
"about": "about", "as": "as", "like": "like", "up": "up", "down": "down",
"over": "over", "under": "under", "between": "between", "among": "among",
"through": "through", "during": "during", "before": "before",
"after": "after", "above": "above", "below": "below", "near": "near",
"since": "since", "until": "until", "while": "while", "although": "although",
"though": "though", "whether": "whether", "unless": "unless",
"because": "because", "if": "if", "than": "than", "once": "once",
# 4. Trạng từ/Phó từ
"not": "not", "also": "also", "very": "very", "too": "too",
"just": "just", "only": "only", "even": "even", "now": "now",
"then": "then", "here": "here", "there": "there", "always": "always",
"never": "never", "sometimes": "sometimes", "usually": "usually",
"often": ", often", "rarely": "rarely", "seldom": "seldom",
"already": "already", "yet": "yet", "still": "still", "almost": "almost",
"enough": "enough", "quite": "quite", "rather": "rather",
"maybe": "maybe", "perhaps": "perhaps", "probably": "probably",
"absolutely": "absolutely", "completely": "completely", "totally": "totally",
"utterly": "utterly", "literally": "literally", "basically": "basically",
"actually": "actually", "certainly": "certainly", "clearly": "clearly",
"obviously": "obviously", "possibly": "possibly", "surely": "surely",
# 5. Từ để hỏi
"what": "what", "when": "when", "where": "where", "why": "why",
"how": "how", "who": "who", "whom": "whom", "whose": "whose",
"which": "which", "whether": "whether",
# 6. Từ cảm thán
"oh": "oh", "wow": "wow", "ouch": "ouch", "oops": "oops",
"hey": "hey", "hi": "hi", "hello": "hello", "goodbye": "goodbye",
"please": "please", "thanks": "thanks", "thank you": "thank you",
"sorry": "sorry", "yes": "yes", "no": "no", "okay": "okay",
# 9. Từ thông dụng khác
"course": ", course", "keep": "KEEP", "Welcome": "WELCOM",
"really": "REALLY", "ok": "okay", "okay": "okay", "good": "good",
"bad": "bad", "new": "new", "old": "old", "first": "first",
"last": "last", "great": "great", "small": "small", "big": "big",
"high": "high", "low": "low", "next": "next", "same": "same",
"different": "different", "another": "another", "each": "each",
"every": "every", "few": "few", "many": "many", "most": "most",
"much": "much", "some": "some", "such": "such", "all": "all",
"any": "any", "both": "both", "either": "either", "neither": "neither",
"none": "none", "own": "own", "other": "other", "rather": "rather",
"several": "several", "half": "half", "whole": "whole",
"true": "true", "false": "false", "right": "right", "wrong": "wrong",
"important": "important", "interesting": "interesting",
"necessary": "necessary", "possible": "possible",
"beautiful": "beautiful", "wonderful": "wonderful",
"excellent": "excellent", "perfect": "perfect",
"awesome": "awesome", "terrible": "terrible",
"horrible": "horrible", "delicious": "delicious",
"important": "important", "special": "special",
"amazing": "amazing", "incredible": "incredible",
"unbelievable": "unbelievable", "serious": "serious",
"funny": "funny", "happy": "happy", "sad": "sad",
"angry": "angry", "surprised": "surprised",
"excited": "excited", "bored": "bored", "tired": "tired",
"hungry": "hungry", "thirsty": "thirsty", "ready": "ready",
"busy": "busy", "free": "free", "open": "open", "closed": "closed",
"easy": "easy", "difficult": "difficult", "simple": "simple",
"complex": "complex", "clean": "clean", "dirty": "dirty",
"early": "early", "late": "late", "fast": "fast", "slow": "slow",
"hot": "hot", "cold": "cold", "warm": "warm", "cool": "cool",
"light": "light", "dark": "dark", "noisy": "noisy", "quiet": "quiet",
"expensive": "expensive", "cheap": "cheap", "rich": "rich",
"poor": "poor", "strong": "strong", "weak": "weak", "healthy": "healthy",
"sick": "sick", "dead": "dead", "alive": "alive", "real": "real",
"fake": "fake", "modern": "modern", "ancient": "ancient",
"young": "young", "future": "future", "past": "past",
"present": "present", "current": "current", "recent": "recent",
"immediate": "immediate", "sudden": "sudden", "final": "final",
"initial": "initial", "primary": "primary", "secondary": "secondary",
"main": "main", "major": "major", "minor": "minor",
"average": "average", "normal": "normal", "usual": "usual",
"unusual": "unusual", "common": "common", "rare": "rare",
"unique": "unique", "strange": "strange", "odd": "odd",
"typical": "typical", "traditional": "traditional",
"original": "original", "basic": "basic", "advanced": "advanced",
"professional": "professional", "personal": "personal",
"private": "private", "public": "public", "national": "national",
"international": "international", "local": "local",
"global": "global", "general": "general", "specific": "specific",
"particular": "particular", "special": "special",
"especial": "especial", "various": "various", "several": "several",
"numerous": "numerous", "countless": "countless", "endless": "endless",
"limited": "limited", "unlimited": "unlimited", "enough": "enough",
"plenty": "plenty", "extra": "extra", "additional": "additional",
"further": "further", "following": "following", "previous": "previous",
"former": "former", "latter": "latter", "upper": "upper",
"lower": "lower", "inner": "inner", "outer": "outer",
"external": "external", "internal": "internal", "central": "central",
"middle": "middle", "intermediate": "intermediate", "direct": "direct",
"indirect": "indirect", "positive": "positive", "negative": "negative",
"neutral": "neutral", "active": "active", "passive": "passive",
"constant": "constant", "variable": "variable", "regular": "regular",
"irregular": "irregular", "proper": "proper", "improper": "improper",
"correct": "correct", "incorrect": "incorrect", "accurate": "accurate",
"inaccurate": "inaccurate", "exact": "exact", "approximate": "approximate",
"precise": "precise", "vague": "vague", "clear": "clear",
"unclear": "unclear", "obvious": "obvious", "apparent": "apparent",
"transparent": "transparent", "opaque": "opaque", "visible": "visible",
"invisible": "invisible", "audible": "audible", "inaudible": "inaudible",
"tangible": "tangible", "intangible": "intangible", "physical": "physical",
"mental": "mental", "emotional": "emotional", "spiritual": "spiritual",
"intellectual": "intellectual", "psychological": "psychological",
"social": "social", "cultural": "cultural", "political": "political",
"economic": "economic", "financial": "financial", "commercial": "commercial",
"industrial": "industrial", "technological": "technological",
"scientific": "scientific", "medical": "medical", "legal": "legal",
"official": "official", "unofficial": "unofficial", "formal": "formal",
"informal": "informal", "casual": "casual", "natural": "natural",
"artificial": "artificial", "synthetic": "synthetic", "organic": "organic",
"inorganic": "inorganic", "genuine": "genuine", "fake": "fake",
"authentic": "authentic", "counterfeit": "counterfeit", "valid": "valid",
"invalid": "invalid", "void": "void", "null": "null", "empty": "empty",
"full": "full", "complete": "complete", "incomplete": "incomplete",
"partial": "partial", "total": "total", "entire": "entire",
"whole": "whole", "half": "half", "quarter": "quarter",
"third": "third", "fourth": "fourth", "fifth": "fifth",
"sixth": "sixth", "seventh": "seventh", "eighth": "eighth",
"ninth": "ninth", "tenth": "tenth", "hundredth": "hundredth",
"thousandth": "thousandth", "millionth": "millionth",
"billionth": "billionth", "trillionth": "trillionth",
"single": "single", "double": "double", "triple": "triple",
"quadruple": "quadruple", "quintuple": "quintuple",
"sextuple": "sextuple", "septuple": "septuple", "octuple": "octuple",
"nonuple": "nonuple", "decuple": "decuple", "multiple": "multiple",
"manifold": "manifold", "various": "various", "diverse": "diverse",
"different": "different", "similar": "similar", "alike": "alike",
"identical": "identical", "same": "same", "equal": "equal",
"equivalent": "equivalent", "parallel": "parallel", "perpendicular": "perpendicular",
"horizontal": "horizontal", "vertical": "vertical", "diagonal": "diagonal",
"straight": "straight", "curved": "curved", "circular": "circular",
"spherical": "spherical", "square": "square", "rectangular": "rectangular",
"triangular": "triangular", "round": "round", "oval": "oval",
"flat": "flat", "smooth": "smooth", "rough": "rough", "soft": "soft",
"hard": "hard", "flexible": "flexible", "rigid": "rigid",
"elastic": "elastic", "plastic": "plastic", "liquid": "liquid",
"solid": "solid", "gaseous": "gaseous", "fluid": "fluid",
"viscous": "viscous", "dense": "dense", "thin": "thin",
"thick": "thick", "shallow": "shallow", "deep": "deep",
"narrow": "narrow", "wide": "wide", "broad": "broad",
"long": "long", "short": "short", "tall": "tall", "huge": "huge",
"tiny": "tiny", "giant": "giant", "gigantic": "gigantic",
"enormous": "enormous", "immense": "immense", "vast": "vast",
"colossal": "colossal", "massive": "massive", "monumental": "monumental",
"tremendous": "tremendous", "stupendous": "stupendous",
"prodigious": "prodigious", "marvelous": "marvelous",
"wonderful": "wonderful", "fantastic": "fantastic",
"fabulous": "fabulous", "terrific": "terrific", "superb": "superb",
"splendid": "splendid", "magnificent": "magnificent",
"glorious": "glorious", "divine": "divine", "heavenly": "heavenly",
"celestial": "celestial", "terrestrial": "terrestrial",
"earthly": "earthly", "mundane": "mundane", "worldly": "worldly",
"human": "human", "humane": "humane", "animal": "animal",
"vegetable": "vegetable", "mineral": "mineral", "metallic": "metallic",
"wooden": "wooden", "glass": "glass", "plastic": "plastic",
"rubber": "rubber", "cloth": "cloth", "paper": "paper",
"leather": "leather", "fur": "fur", "wool": "wool", "silk": "silk",
"cotton": "cotton", "linen": "linen", "nylon": "nylon",
"polyester": "polyester", "rayon": "rayon", "acrylic": "acrylic",
"spandex": "spandex", "denim": "denim", "velvet": "velvet",
"satin": "satin", "lace": "lace", "tulle": "tulle",
"chiffon": "chiffon", "taffeta": "taffeta", "organza": "organza",
"georgette": "georgette", "crepe": "crepe", "jersey": "jersey",
"tweed": "tweed", "corduroy": "corduroy", "flannel": "flannel",
"cashmere": "cashmere", "angora": "angora", "mohair": "mohair",
"alpaca": "alpaca", "llama": "llama", "camel": "camel",
"vicuna": "vicuna", "guanaco": "guanaco", "bison": "bison",
"buffalo": "buffalo", "yak": "yak", "muskox": "muskox",
"qiviut": "qiviut", "chiengora": "chiengora", "dog": "dog",
"cat": "cat", "horse": "horse", "cow": "cow", "pig": "pig",
"sheep": "sheep", "goat": "goat", "chicken": "chicken",
"duck": "duck", "goose": "goose", "turkey": "turkey",
"rabbit": "rabbit", "deer": "deer", "moose": "moose",
"elk": "elk", "caribou": "caribou", "reindeer": "reindeer",
"antelope": "antelope", "gazelle": "gazelle", "impala": "impala",
"springbok": "springbok", "oryx": "oryx", "addax": "addax",
"kudu": "kudu", "eland": "eland", "nyala": "nyala", "bongo": "bongo",
"sable": "sable", "roan": "roan", "waterbuck": "waterbuck",
"lechwe": "lechwe", "puku": "puku", "sitatunga": "sitatunga",
"bushbuck": "bushbuck", "duiker": "duiker", "klipspringer": "klipspringer",
"steenbok": "steenbok", "grysbok": "grysbok", "dikdik": "dikdik",
"sun": "sun", "moon": "moon", "star": "star", "planet": "planet",
"comet": "comet", "asteroid": "asteroid", "meteor": "meteor",
"meteorite": "meteorite", "meteoroid": "meteoroid", "galaxy": "galaxy",
"universe": "universe", "cosmos": "cosmos", "space": "space",
"time": "time", "light": "light", "dark": "dark", "energy": "energy",
"matter": "matter", "atom": "atom", "molecule": "molecule",
"element": "element", "compound": "compound", "mixture": "mixture",
"solution": "solution", "suspension": "suspension",
"colloid": "colloid", "emulsion": "emulsion", "foam": "foam",
"aerosol": "aerosol", "gel": "gel", "sol": "sol", "solid": "solid",
"liquid": "liquid", "gas": "gas", "plasma": "plasma",
"crystal": "crystal", "powder": "powder", "granule": "granule",
"particle": "particle", "grain": "grain", "pebble": "pebble",
"stone": "stone", "rock": "rock", "boulder": "boulder",
"sand": "sand", "soil": "soil", "clay": "clay", "mud": "mud",
"dust": "dust", "dirt": "dirt", "earth": "earth", "ground": "ground",
"land": "land", "terrain": "terrain", "topography": "topography",
"geography": "geography", "geology": "geology", "ocean": "ocean",
"sea": "sea", "lake": "lake", "river": "river", "stream": "stream",
"creek": "creek", "brook": "brook", "pond": "pond", "pool": "pool",
"puddle": "puddle", "swamp": "swamp", "marsh": "marsh",
"bog": "bog", "fen": "fen", "wetland": "wetland", "delta": "delta",
"estuary": "estuary", "lagoon": "lagoon", "fjord": "fjord",
"bay": "bay", "gulf": "gulf", "harbor": "harbor", "port": "port",
"dock": "dock", "pier": "pier", "wharf": "wharf", "quay": "quay",
"marina": "marina", "canal": "canal", "channel": "channel",
"strait": "strait", "sound": "sound", "inlet": "inlet",
"cove": "cove", "beach": "beach", "shore": "shore",
"coast": "coast", "bank": "bank", "cliff": "cliff",
"bluff": "bluff", "dune": "dune", "hill": "hill",
"mountain": "mountain", "valley": "valley", "canyon": "canyon",
"gorge": "gorge", "ravine": "ravine", "glen": "glen",
"dale": "dale", "meadow": "meadow", "field": "field",
"pasture": "pasture", "prairie": "prairie", "savanna": "savanna",
"steppe": "steppe", "tundra": "tundra", "desert": "desert",
"jungle": "jungle", "forest": "forest", "woodland": "woodland",
"grove": "grove", "orchard": "orchard", "vineyard": "vineyard",
"park": "park", "garden": "garden", "lawn": "lawn",
"yard": "yard", "courtyard": "courtyard", "plaza": "plaza",
"square": "square", "circle": "circle", "triangle": "triangle",
"rectangle": "rectangle", "pentagon": "pentagon", "hexagon": "hexagon",
"octagon": "octagon", "oval": "oval", "ellipse": "ellipse",
"cube": "cube", "sphere": "sphere", "cylinder": "cylinder",
"cone": "cone", "pyramid": "pyramid", "prism": "prism",
"tetrahedron": "tetrahedron", "dodecahedron": "dodecahedron",
"icosahedron": "icosahedron", "torus": "torus", "helix": "helix",
"spiral": "spiral", "fractal": "fractal", "dimension": "dimension",
"point": "point", "line": "line", "plane": "plane",
"surface": "surface", "volume": "volume", "angle": "angle",
"degree": "degree", "radian": "radian", "gradian": "gradian",
"minute": "minute", "second": "second", "hour": "hour",
"day": "day", "week": "week", "month": "month", "year": "year",
"decade": "decade", "century": "century", "millennium": "millennium",
"eon": "eon", "era": "era", "epoch": "epoch", "age": "age",
"generation": "generation", "lifetime": "lifetime",
"moment": "moment", "instant": "instant", "period": "period",
"duration": "duration", "interval": "interval", "span": "span",
"term": "term", "session": "session", "meeting": "meeting",
"conference": "conference", "convention": "convention",
"symposium": "symposium", "seminar": "seminar", "workshop": "workshop",
"lecture": "lecture", "presentation": "presentation",
"demonstration": "demonstration", "exhibition": "exhibition",
"fair": "fair", "show": "show", "performance": "performance",
"concert": "concert", "recital": "recital", "play": "play",
"drama": "drama", "opera": "opera", "ballet": "ballet",
"musical": "musical", "movie": "movie", "film": "film",
"cinema": "cinema", "theater": "theater", "television": "television",
"radio": "radio", "broadcast": "broadcast", "podcast": "podcast",
"webcast": "webcast", "stream": "stream", "download": "download",
"upload": "upload", "file": "file", "document": "document",
"record": "record", "archive": "archive", "database": "database",
"library": "library", "catalog": "catalog", "index": "index",
"directory": "directory", "folder": "folder", "album": "album",
"collection": "collection", "set": "set", "series": "series",
"sequence": "sequence", "array": "array", "matrix": "matrix",
"grid": "grid", "table": "table", "chart": "chart",
"graph": "graph", "diagram": "diagram", "map": "map",
"plan": "plan", "scheme": "scheme", "blueprint": "blueprint",
"design": "design", "model": "model", "prototype": "prototype",
"template": "template", "pattern": "pattern", "sample": "sample",
"example": "example", "instance": "instance", "case": "case",
"item": "item", "unit": "unit", "piece": "piece", "part": "part",
"section": "section", "segment": "segment", "portion": "portion",
"fraction": "fraction", "percentage": "percentage", "ratio": "ratio",
"proportion": "proportion", "rate": "rate", "speed": "speed",
"velocity": "velocity", "acceleration": "acceleration",
"momentum": "momentum", "force": "force", "pressure": "pressure",
"stress": "stress", "strain": "strain", "tension": "tension",
"compression": "compression", "shear": "shear", "torque": "torque",
"energy": "energy", "work": "work", "power": "power",
"efficiency": "efficiency", "capacity": "capacity",
"potential": "potential", "kinetic": "kinetic", "thermal": "thermal",
"electrical": "electrical", "magnetic": "magnetic",
"nuclear": "nuclear", "chemical": "chemical", "atomic": "atomic",
"molecular": "molecular", "quantum": "quantum", "relativistic": "relativistic",
"gravitational": "gravitational", "electromagnetic": "electromagnetic",
"weak": "weak", "strong": "strong", "fundamental": "fundamental",
"basic": "basic", "advanced": "advanced", "complex": "complex",
"simple": "simple", "difficult": "difficult", "easy": "easy",
"hard": "hard", "soft": "soft", "rigid": "rigid", "flexible": "flexible",
"elastic": "elastic", "plastic": "plastic", "ductile": "ductile",
"malleable": "malleable", "brittle": "brittle", "tough": "tough",
"hardy": "hardy", "resilient": "resilient", "fragile": "fragile",
"delicate": "delicate", "sturdy": "sturdy", "robust": "robust",
"durable": "durable", "stable": "stable", "unstable": "unstable",
"volatile": "volatile", "constant": "constant", "variable": "variable",
"random": "random", "chaotic": "chaotic", "ordered": "ordered",
"disordered": "disordered", "organized": "organized",
"disorganized": "disorganized", "systematic": "systematic",
"methodical": "methodical", "logical": "logical", "illogical": "illogical",
"rational": "rational", "irrational": "irrational", "reasonable": "reasonable",
"unreasonable": "unreasonable", "sensible": "sensible", "nonsensical": "nonsensical",
"absurd": "absurd", "ridiculous": "ridiculous", "ludicrous": "ludicrous",
"preposterous": "preposterous", "outrageous": "outrageous", "scandalous": "scandalous",
"shocking": "shocking", "astonishing": "astonishing", "amazing": "amazing",
"astounding": "astounding", "staggering": "staggering", "stunning": "stunning",
"breathtaking": "breathtaking", "mindblowing": "mindblowing", "jawdropping": "jawdropping",
"eyeopening": "eyeopening", "earthshaking": "earthshaking", "groundbreaking": "groundbreaking",
"revolutionary": "revolutionary", "innovative": "innovative", "original": "original",
"creative": "creative", "imaginative": "imaginative", "inventive": "inventive",
"resourceful": "resourceful", "ingenious": "ingenious", "clever": "clever",
"smart": "smart", "intelligent": "intelligent", "brilliant": "brilliant",
"genius": "genius", "gifted": "gifted", "talented": "talented",
"skilled": "skilled", "adept": "adept", "proficient": "proficient",
"competent": "competent", "capable": "capable", "able": "able",
"qualified": "qualified", "experienced": "experienced", "practiced": "practiced",
"seasoned": "seasoned", "veteran": "veteran", "expert": "expert",
"master": "master", "professional": "professional", "specialist": "specialist",
"authority": "authority", "scholar": "scholar", "academic": "academic",
"scientist": "scientist", "researcher": "researcher", "investigator": "investigator",
"analyst": "analyst", "theorist": "theorist", "philosopher": "philosopher",
"thinker": "thinker", "intellectual": "intellectual", "sage": "sage",
"wise": "wise", "knowledgeable": "knowledgeable", "learned": "learned",
"educated": "educated", "literate": "literate", "illiterate": "illiterate",
"ignorant": "ignorant", "uninformed": "uninformed", "naive": "naive",
"innocent": "innocent", "simple": "simple", "unsophisticated": "unsophisticated",
"gullible": "gullible", "credulous": "credulous", "trusting": "trusting",
"skeptical": "skeptical", "doubtful": "doubtful", "suspicious": "suspicious",
"cynical": "cynical", "pessimistic": "pessimistic", "optimistic": "optimistic",
"hopeful": "hopeful", "confident": "confident", "certain": "certain",
"sure": "sure", "positive": "positive", "negative": "negative",
"neutral": "neutral", "indifferent": "indifferent", "apathetic": "apathetic",
"unconcerned": "unconcerned", "detached": "detached", "disinterested": "disinterested",
"objective": "objective", "subjective": "subjective", "biased": "biased",
"prejudiced": "prejudiced", "partial": "partial", "impartial": "impartial",
"fair": "fair", "just": "just", "unjust": "unjust", "righteous": "righteous",
"virtuous": "virtuous", "moral": "moral", "ethical": "ethical",
"principled": "principled", "honorable": "honorable", "noble": "noble",
"upstanding": "upstanding", "respectable": "respectable", "decent": "decent",
"good": "good", "evil": "evil", "wicked": "wicked", "sinful": "sinful",
"immoral": "immoral", "unethical": "unethical", "corrupt": "corrupt",
"dishonest": "dishonest", "fraudulent": "fraudulent", "deceitful": "deceitful",
"lying": "lying", "false": "false", "untrue": "untrue", "truthful": "truthful",
"honest": "honest", "sincere": "sincere", "genuine": "genuine",
"authentic": "authentic", "real": "real", "fake": "fake",
"counterfeit": "counterfeit", "forged": "forged", "sham": "sham",
"bogus": "bogus", "phony": "phony", "artificial": "artificial",
"synthetic": "synthetic", "manmade": "manmade", "natural": "natural",
"organic": "organic", "pure": "pure", "impure": "impure",
"contaminated": "contaminated", "polluted": "polluted", "toxic": "toxic",
"poisonous": "poisonous", "hazardous": "hazardous", "dangerous": "dangerous",
"risky": "risky", "perilous": "perilous", "precarious": "precarious",
"uncertain": "uncertain", "unpredictable": "unpredictable", "volatile": "volatile",
"unstable": "unstable", "insecure": "insecure", "safe": "safe",
"secure": "secure", "protected": "protected", "guarded": "guarded",
"defended": "defended", "shielded": "shielded", "sheltered": "sheltered",
"immune": "immune", "resistant": "resistant", "vulnerable": "vulnerable",
"susceptible": "susceptible", "exposed": "exposed", "open": "open",
"closed": "closed", "shut": "shut", "locked": "locked",
"unlocked": "unlocked", "sealed": "sealed", "unsealed": "unsealed",
"tight": "tight", "loose": "loose", "firm": "firm",
"stable": "stable", "steady": "steady", "unsteady": "unsteady",
"shaky": "shaky", "wobbly": "wobbly", "rocky": "rocky",
"balanced": "balanced", "unbalanced": "unbalanced", "level": "level",
"even": "even", "uneven": "uneven", "flat": "flat",
"smooth": "smooth", "rough": "rough", "bumpy": "bumpy",
"lumpy": "lumpy", "jagged": "jagged", "pointed": "pointed",
"sharp": "sharp", "blunt": "blunt", "dull": "dull",
"rounded": "rounded", "curved": "curved", "straight": "straight",
"bent": "bent", "twisted": "twisted", "coiled": "coiled",
"spiral": "spiral", "helical": "helical", "circular": "circular",
"round": "round", "oval": "oval", "elliptical": "elliptical",
"spherical": "spherical", "globular": "globular", "cylindrical": "cylindrical",
"conical": "conical", "pyramidal": "pyramidal", "cubical": "cubical",
"rectangular": "rectangular", "triangular": "triangular", "square": "square",
"pentagonal": "pentagonal", "hexagonal": "hexagonal", "octagonal": "octagonal",
"polygonal": "polygonal", "symmetrical": "symmetrical", "asymmetrical": "asymmetrical",
"regular": "regular", "irregular": "irregular", "uniform": "uniform",
"nonuniform": "nonuniform", "consistent": "consistent", "inconsistent": "inconsistent",
"coherent": "coherent", "incoherent": "incoherent", "logical": "logical",
"illogical": "illogical", "rational": "rational", "irrational": "irrational",
"reasonable": "reasonable", "unreasonable": "unreasonable", "sensible": "sensible",
"nonsensical": "nonsensical", "absurd": "absurd", "ridiculous": "ridiculous",
"ludicrous": "ludicrous", "preposterous": "preposterous", "outrageous": "outrageous",
"scandalous": "scandalous", "shocking": "shocking", "astonishing": "astonishing",
"amazing": "amazing", "astounding": "astounding", "staggering": "staggering",
"stunning": "stunning", "breathtaking": "breathtaking", "mindblowing": "mindblowing",
"jawdropping": "jawdropping", "eyeopening": "eyeopening", "earthshaking": "earthshaking",
"groundbreaking": "groundbreaking", "revolutionary": "revolutionary", "innovative": "innovative",
"original": "original", "creative": "creative", "imaginative": "imaginative",
"inventive": "inventive", "resourceful": "resourceful", "ingenious": "ingenious",
"clever": "clever", "smart": "smart", "intelligent": "intelligent",
"brilliant": "brilliant", "genius": "genius", "gifted": "gifted",
"talented": "talented", "skilled": "skilled", "adept": "adept",
"proficient": "proficient", "competent": "competent", "capable": "capable",
"able": "able", "qualified": "qualified", "experienced": "experienced",
"practiced": "practiced", "seasoned": "seasoned", "veteran": "veteran",
"expert": "expert", "master": "master", "professional": "professional",
"specialist": "specialist", "authority": "authority", "scholar": "scholar",
"academic": "academic", "scientist": "scientist", "researcher": "researcher",
"investigator": "investigator", "analyst": "analyst", "theorist": "theorist",
"philosopher": "philosopher", "thinker": "thinker", "intellectual": "intellectual",
"sage": "sage", "wise": "wise", "knowledgeable": "knowledgeable",
"learned": "learned", "educated": "educated", "literate": "literate",
"illiterate": "illiterate", "ignorant": "ignorant", "uninformed": "uninformed",
"naive": "naive", "innocent": "innocent", "simple": "simple",
"unsophisticated": "unsophisticated", "gullible": "gullible", "credulous": "credulous",
"trusting": "trusting", "skeptical": "skeptical", "doubtful": "doubtful",
"suspicious": "suspicious", "cynical": "cynical", "pessimistic": "pessimistic",
"optimistic": "optimistic", "hopeful": "hopeful", "confident": "confident",
"certain": "certain", "sure": "sure", "positive": "positive",
"negative": "negative", "neutral": "neutral", "indifferent": "indifferent",
"apathetic": "apathetic", "unconcerned": "unconcerned", "detached": "detached",
"disinterested": "disinterested", "objective": "objective", "subjective": "subjective",
"biased": "biased", "prejudiced": "prejudiced", "partial": "partial",
"impartial": "impartial", "fair": "fair", "just": "just",
"unjust": "unjust", "righteous": "righteous", "virtuous": "virtuous",
"moral": "moral", "ethical": "ethical", "principled": "principled",
"honorable": "honorable", "noble": "noble", "upstanding": "upstanding",
"respectable": "respectable", "decent": "decent", "good": "good",
"evil": "evil", "wicked": "wicked", "sinful": "sinful",
"immoral": "immoral", "unethical": "unethical", "corrupt": "corrupt",
"dishonest": "dishonest", "fraudulent": "fraudulent", "deceitful": "deceitful",
"lying": "lying", "false": "false", "untrue": "untrue",
"truthful": "truthful", "honest": "honest", "sincere": "sincere",
"genuine": "genuine", "authentic": "authentic", "real": "real",
"fake": "fake", "counterfeit": "counterfeit", "forged": "forged",
"sham": "sham", "bogus": "bogus", "phony": "phony",
"artificial": "artificial", "synthetic": "synthetic", "manmade": "manmade",
"natural": "natural", "organic": "organic", "pure": "pure",
"impure": "impure", "contaminated": "contaminated", "polluted": "polluted",
"toxic": "toxic", "poisonous": "poisonous", "hazardous": "hazardous",
"dangerous": "dangerous", "risky": "risky", "perilous": "perilous",
"precarious": "precarious", "uncertain": "uncertain", "unpredictable": "unpredictable",
"volatile": "volatile", "unstable": "unstable", "insecure": "insecure",
"safe": "safe", "secure": "secure", "protected": "protected",
"guarded": "guarded", "defended": "defended", "shielded": "shielded",
"sheltered": "sheltered", "immune": "immune", "resistant": "resistant",
"vulnerable": "vulnerable", "susceptible": "susceptible", "exposed": "exposed",
"open": "open", "closed": "closed", "shut": "shut",
"locked": "locked", "unlocked": "unlocked", "sealed": "sealed",
"unsealed": "unsealed", "tight": "tight", "loose": "loose",
"firm": "firm", "stable": "stable", "steady": "steady",
"unsteady": "unsteady", "shaky": "shaky", "wobbly": "wobbly",
"rocky": "rocky", "balanced": "balanced", "unbalanced": "unbalanced",
"level": "level", "even": "even", "uneven": "uneven",
"flat": "flat", "smooth": "smooth", "rough": "rough",
"bumpy": "bumpy", "lumpy": "lumpy", "jagged": "jagged",
"pointed": "pointed", "sharp": "sharp", "blunt": "blunt",
"dull": "dull", "rounded": "rounded", "curved": "curved",
"straight": "straight", "bent": "bent", "twisted": "twisted",
"coiled": "coiled", "spiral": "spiral", "helical": "helical",
"circular": "circular", "round": "round", "oval": "oval",
"elliptical": "elliptical", "spherical": "spherical", "globular": "globular",
"cylindrical": "cylindrical", "conical": "conical", "pyramidal": "pyramidal",
"cubical": "cubical", "rectangular": "rectangular", "triangular": "triangular",
"square": "square", "pentagonal": "pentagonal", "hexagonal": "hexagonal",
"octagonal": "octagonal", "polygonal": "polygonal", "symmetrical": "symmetrical",
"asymmetrical": "asymmetrical", "regular": "regular", "irregular": "irregular",
"uniform": "uniform", "nonuniform": "nonuniform", "consistent": "consistent",
"inconsistent": "inconsistent", "coherent": "coherent", "incoherent": "incoherent",
"logical": "logical", "illogical": "illogical", "rational": "rational",
"irrational": "irrational", "reasonable": "reasonable", "unreasonable": "unreasonable",
"sensible": "sensible", "nonsensical": "nonsensical", "absurd": "absurd",
"ridiculous": "ridiculous", "ludicrous": "ludicrous", "preposterous": "preposterous",
"outrageous": "outrageous", "scandalous": "scandalous", "shocking": "shocking",
"astonishing": "astonishing", "amazing": "amazing", "astounding": "astounding",
"staggering": "staggering", "stunning": "stunning", "breathtaking": "breathtaking",
"mindblowing": "mindblowing", "jawdropping": "jawdropping", "eyeopening": "eyeopening",
"earthshaking": "earthshaking", "groundbreaking": "groundbreaking", "revolutionary": "revolutionary",
"innovative": "innovative", "original": "original", "creative": "creative",
"imaginative": "imaginative", "inventive": "inventive", "resourceful": "resourceful",
"ingenious": "ingenious", "clever": "clever", "smart": "smart",
"intelligent": "intelligent", "brilliant": "brilliant", "genius": "genius",
"gifted": "gifted", "talented": "talented", "skilled": "skilled",
"adept": "adept", "proficient": "proficient", "competent": "competent",
"capable": "capable", "able": "able", "qualified": "qualified",
"experienced": "experienced", "practiced": "practiced", "seasoned": "seasoned",
"veteran": "veteran", "expert": "expert", "master": "master",
"professional": "professional", "specialist": "specialist", "authority": "authority",
"scholar": "scholar", "academic": "academic", "scientist": "scientist",
"researcher": "researcher", "investigator": "investigator", "analyst": "analyst",
"theorist": "theorist", "philosopher": "philosopher", "thinker": "thinker",
"intellectual": "intellectual", "sage": "sage", "wise": "wise",
"knowledgeable": "knowledgeable", "learned": "learned", "educated": "educated",
"literate": "literate", "illiterate": "illiterate", "ignorant": "ignorant",
"uninformed": "uninformed", "naive": "naive", "innocent": "innocent",
"simple": "simple", "unsophisticated": "unsophisticated", "gullible": "gullible",
"credulous": "credulous", "trusting": "trusting", "skeptical": "skeptical",
"doubtful": "doubtful", "suspicious": "suspicious", "cynical": "cynical",
"pessimistic": "pessimistic", "optimistic": "optimistic", "hopeful": "hopeful",
"confident": "confident", "certain": "certain", "sure": "sure",
"positive": "positive", "negative": "negative", "neutral": "neutral",
"indifferent": "indifferent", "apathetic": "apathetic", "unconcerned": "unconcerned",
"detached": "detached", "disinterested": "disinterested", "objective": "objective",
"subjective": "subjective", "biased": "biased", "prejudiced": "prejudiced",
"partial": "partial", "impartial": "impartial", "fair": "fair",
"just": "just", "unjust": "unjust", "righteous": "righteous",
"virtuous": "virtuous", "moral": "moral", "ethical": "ethical",
"principled": "principled", "honorable": "honorable", "noble": "noble",
"upstanding": "upstanding", "respectable": "respectable", "decent": "decent",
"good": "good", "evil": "evil", "wicked": "wicked",
"sinful": "sinful", "immoral": "immoral", "unethical": "unethical",
"corrupt": "corrupt", "dishonest": "dishonest", "fraudulent": "fraudulent",
"deceitful": "deceitful", "lying": "lying", "false": "false",
"untrue": "untrue", "truthful": "truthful", "honest": "honest",
"sincere": "sincere", "genuine": "genuine", "authentic": "authentic",
"real": "real", "fake": "fake", "counterfeit": "counterfeit",
"forged": "forged", "sham": "sham", "bogus": "bogus",
"phony": "phony", "artificial": "artificial", "synthetic": "synthetic",
"manmade": "manmade", "natural": "natural", "organic": "organic",
"pure": "pure", "impure": "impure", "contaminated": "contaminated",
"polluted": "polluted", "toxic": "toxic", "poisonous": "poisonous",
"hazardous": "hazardous", "dangerous": "dangerous", "risky": "risky",
"perilous": "perilous", "precarious": "precarious", "uncertain": "uncertain",
"unpredictable": "unpredictable", "volatile": "volatile", "unstable": "unstable",
"insecure": "insecure", "safe": "safe", "secure": "secure",
"protected": "protected", "guarded": "guarded", "defended": "defended",
"shielded": "shielded", "sheltered": "sheltered", "immune": "immune",
"resistant": "resistant", "vulnerable": "vulnerable", "susceptible": "susceptible",
"exposed": "exposed", "open": "open", "closed": "closed",
"shut": "shut", "locked": "locked", "unlocked": "unlocked",
"sealed": "sealed", "unsealed": "unsealed", "tight": "tight",
"loose": "loose", "firm": "firm", "stable": "stable",
"steady": "steady", "unsteady": "unsteady", "shaky": "shaky",
"wobbly": "wobbly", "rocky": "rocky", "balanced": "balanced",
"unbalanced": "unbalanced", "level": "level", "even": "even",
"uneven": "uneven", "flat": "flat", "smooth": "smooth",
"rough": "rough", "bumpy": "bumpy", "lumpy": "lumpy",
"jagged": "jagged", "pointed": "pointed", "sharp": "sharp",
"blunt": "blunt", "dull": "dull", "rounded": "rounded",
"curved": "curved", "straight": "straight", "bent": "bent",
"twisted": "twisted", "coiled": "coiled", "spiral": "spiral",
"helical": "helical", "circular": "circular", "round": "round",
"oval": "oval", "elliptical": "elliptical", "spherical": "spherical",
"globular": "globular", "cylindrical": "cylindrical", "conical": "conical",
"pyramidal": "pyramidal", "cubical": "cubical", "rectangular": "rectangular",
"triangular": "triangular", "square": "square", "pentagonal": "pentagonal",
"hexagonal": "hexagonal", "octagonal": "octagonal", "polygonal": "polygonal",
"symmetrical": "symmetrical", "asymmetrical": "asymmetrical", "regular": "regular",
"irregular": "irregular", "uniform": "uniform", "nonuniform": "nonuniform",
"consistent": "consistent", "inconsistent": "inconsistent", "coherent": "coherent",
"incoherent": "incoherent", "logical": "logical", "illogical": "illogical",
"rational": "rational", "irrational": "irrational", "reasonable": "reasonable",
"unreasonable": "unreasonable", "sensible": "sensible", "nonsensical": "nonsensical",
"absurd": "absurd", "ridiculous": "ridiculous", "ludicrous": "ludicrous",
"preposterous": "preposterous", "outrageous": "outrageous", "scandalous": "scandalous",
"shocking": "shocking", "astonishing": "astonishing", "amazing": "amazing",
"astounding": "astounding", "staggering": "staggering", "stunning": "stunning",
"breathtaking": "breathtaking", "mindblowing": "mindblowing", "jawdropping": "jawdropping",
"eyeopening": "eyeopening", "earthshaking": "earthshaking", "groundbreaking": "groundbreaking",
"revolutionary": "revolutionary", "innovative": "innovative", "original": "original",
"creative": "creative", "imaginative": "imaginative", "inventive": "inventive",
"resourceful": "resourceful", "ingenious": "ingenious", "clever": "clever",
"smart": "smart", "intelligent": "intelligent", "brilliant": "brilliant",
"genius": "genius", "gifted": "gifted", "talented": "talented",
"skilled": "skilled", "adept": "adept", "proficient": "proficient",
"competent": "competent", "capable": "capable", "able": "able",
"qualified": "qualified", "experienced": "experienced", "practiced": "practiced",
"seasoned": "seasoned", "veteran": "veteran", "expert": "expert",
"master": "master", "professional": "professional", "specialist": "specialist",
"authority": "authority", "scholar": "scholar", "academic": "academic",
"scientist": "scientist", "researcher": "researcher", "investigator": "investigator",
"analyst": "analyst", "theorist": "theorist", "philosopher": "philosopher",
"thinker": "thinker", "intellectual": "intellectual", "sage": "sage",
"wise": "wise", "knowledgeable": "knowledgeable", "learned": "learned",
"educated": "educated", "literate": "literate", "illiterate": "illiterate",
"ignorant": "ignorant", "uninformed": "uninformed", "naive": "naive",
"innocent": "innocent", "simple": "simple", "unsophisticated": "unsophisticated",
"gullible": "gullible", "credulous": "credulous", "trusting": "trusting",
"skeptical": "skeptical", "doubtful": "doubtful", "suspicious": "suspicious",
"cynical": "cynical", "pessimistic": "pessimistic", "optimistic": "optimistic",
"hopeful": "hopeful", "confident": "confident", "certain": "certain",
"sure": "sure", "positive": "positive", "negative": "negative",
"neutral": "neutral", "indifferent": "indifferent", "apathetic": "apathetic",
"unconcerned": "unconcerned", "detached": "detached", "disinterested": "disinterested",
"objective": "objective", "subjective": "subjective", "biased": "biased",
"prejudiced": "prejudiced", "partial": "partial", "impartial": "impartial",
"fair": "fair", "just": "just", "unjust": "unjust",
"righteous": "righteous", "virtuous": "virtuous", "moral": "moral",
"ethical": "ethical", "principled": "principled", "honorable": "honorable",
"noble": "noble", "upstanding": "upstanding", "respectable": "respectable",
"decent": "decent", "good": "good", "evil": "evil",
"wicked": "wicked", "sinful": "sinful", "immoral": "immoral",
"unethical": "unethical", "corrupt": "corrupt", "dishonest": "dishonest",
"fraudulent": "fraudulent", "deceitful": "deceitful", "lying": "lying",
"false": "false", "untrue": "untrue", "truthful": "truthful",
"honest": "honest", "sincere": "sincere", "genuine": "genuine",
"authentic": "authentic", "real": "real", "fake": "fake",
"counterfeit": "counterfeit", "forged": "forged", "sham": "sham",
"bogus": "bogus", "phony": "phony", "artificial": "artificial",
"synthetic": "synthetic", "manmade": "manmade", "natural": "natural",
"organic": "organic", "pure": "pure", "impure": "impure",
"contaminated": "contaminated", "polluted": "polluted", "toxic": "toxic",
"poisonous": "poisonous", "hazardous": "hazardous", "dangerous": "dangerous",
"risky": "risky", "perilous": "perilous", "precarious": "precarious",
"uncertain": "uncertain", "unpredictable": "unpredictable", "volatile": "volatile",
"unstable": "unstable", "insecure": "insecure", "safe": "safe",
"secure": "secure", "protected": "protected", "guarded": "guarded",
"defended": "defended", "shielded": "shielded", "sheltered": "sheltered",
"immune": "immune", "resistant": "resistant", "vulnerable": "vulnerable",
"susceptible": "susceptible", "exposed": "exposed", "open": "open",
"closed": "closed", "shut": "shut", "locked": "locked",
"unlocked": "unlocked", "sealed": "sealed", "unsealed": "unsealed",
"tight": "tight", "loose": "loose", "firm": "firm",
"stable": "stable", "steady": "steady", "unsteady": "unsteady",
"shaky": "shaky", "wobbly": "wobbly", "rocky": "rocky",
"balanced": "balanced", "unbalanced": "unbalanced", "level": "level",
"even": "even", "uneven": "uneven", "flat": "flat",
"smooth": "smooth", "rough": "rough", "bumpy": "bumpy",
"lumpy": "lumpy", "jagged": "jagged", "pointed": "pointed",
"sharp": "sharp", "blunt": "blunt", "dull": "dull",
"rounded": "rounded", "curved": "curved", "straight": "straight",
"bent": "bent", "twisted": "twisted", "coiled": "coiled",
"spiral": "spiral", "helical": "helical", "circular": "circular",
"round": "round", "oval": "oval", "elliptical": "elliptical",
"spherical": "spherical", "globular": "globular", "cylindrical": "cylindrical",
"conical": "conical", "pyramidal": "pyramidal", "cubical": "cubical",
"rectangular": "rectangular", "triangular": "triangular", "square": "square",
"pentagonal": "pentagonal", "hexagonal": "hexagonal", "octagonal": "octagonal",
"polygonal": "polygonal", "symmetrical": "symmetrical", "asymmetrical": "asymmetrical",
"regular": "regular", "irregular": "irregular", "uniform": "uniform",
"nonuniform": "nonuniform", "consistent": "consistent", "inconsistent": "inconsistent",
"coherent": "coherent", "incoherent": "incoherent", "logical": "logical",
"illogical": "illogical", "rational": "rational", "irrational": "irrational",
"reasonable": "reasonable", "unreasonable": "unreasonable", "sensible": "sensible",
"nonsensical": "nonsensical", "absurd": "absurd", "ridiculous": "ridiculous",
"ludicrous": "ludicrous", "preposterous": "preposterous", "outrageous": "outrageous",
"scandalous": "scandalous", "shocking": "shocking", "astonishing": "astonishing",
"amazing": "amazing", "astounding": "astounding", "staggering": "staggering",
"stunning": "stunning", "breathtaking": "breathtaking", "mindblowing": "mindblowing",
"jawdropping": "jawdropping", "eyeopening": "eyeopening", "earthshaking": "earthshaking",
"groundbreaking": "groundbreaking", "revolutionary": "revolutionary", "innovative": "innovative",
"original": "original", "creative": "creative", "imaginative": "imaginative",
"inventive": "inventive", "resourceful": "resourceful", "ingenious": "ingenious",
"clever": "clever", "smart": "smart", "intelligent": "intelligent",
"brilliant": "brilliant", "genius": "genius", "gifted": "gifted",
"talented": "talented", "skilled": "skilled", "adept": "adept",
"proficient": "proficient", "competent": "competent", "capable": "capable",
"able": "able", "qualified": "qualified", "experienced": "experienced",
"practiced": "practiced", "seasoned": "seasoned", "veteran": "veteran",
"expert": "expert", "master": "master", "professional": "professional",
"specialist": "specialist", "authority": "authority", "scholar": "scholar",
"academic": "academic", "scientist": "scientist", "researcher": "researcher",
"investigator": "investigator", "analyst": "analyst", "theorist": "theorist",
"philosopher": "philosopher", "thinker": "thinker", "intellectual": "intellectual",
"sage": "sage", "wise": "wise", "knowledgeable": "knowledgeable",
"learned": "learned", "educated": "educated", "literate": "literate",
"illiterate": "illiterate"
}
return cases
def _build_special_regex(self):
"""Regex để nhận diện các từ đặc biệt"""
words = sorted(self.special_cases.keys(), key=len, reverse=True)
return re.compile(r'\b(' + '|'.join(map(re.escape, words)) + r')\b', flags=re.IGNORECASE)
def _build_abbreviation_patterns(self):
"""Xử lý các từ viết tắt"""
return {
# Viết tắt chung
r"\betc\.?\b": "et cetera ",
r"\bvs\.?\b": "versus ",
r"\be\.g\.\b": "for example ",
r"\bi\.e\.\b": "that is ",
r"\bDr\.?\b": "Doctor ",
r"\bMr\.?\b": "Mister ",
r"\bMrs\.?\b": "Misses ",
r"\bMs\.?\b": "Miss ",
r"\bProf\.?\b": "Professor ",
r"\bSt\.?\b": "Saint ",
r"\bAve\.?\b": "Avenue ",
r"\bBlvd\.?\b": "Boulevard ",
r"\bRd\.?\b": "Road ",
r"\bJan\.?\b": "January ",
r"\bFeb\.?\b": "February ",
r"\bMar\.?\b": "March ",
r"\bApr\.?\b": "April ",
r"\bJun\.?\b": "June ",
r"\bJul\.?\b": "July ",
r"\bAug\.?\b": "August ",
r"\bSep(?:t)?\.?\b": "September ",
r"\bOct\.?\b": "October ",
r"\bNov\.?\b": "November ",
r"\bDec\.?\b": "December ",
r"\bapprox\.?\b": "approximately ",
r"\bmin\.?\b": "minute ",
r"\bmax\.?\b": "maximum ",
r"\bdept\.?\b": "department ",
# Số thứ tự
r"\b1st\b": "first ",
r"\b2nd\b": "second ",
r"\b3rd\b": "third ",
r"\b4th\b": "fourth ",
r"\b5th\b": "fifth ",
r"\b6th\b": "sixth ",
r"\b7th\b": "seventh ",
r"\b8th\b": "eighth ",
r"\b9th\b": "ninth ",
r"\b10th\b": "tenth ",
r"\b11th\b": "eleventh ",
r"\b12th\b": "twelfth ",
r"\b13th\b": "thirteenth ",
r"\b14th\b": "fourteenth ",
r"\b15th\b": "fifteenth ",
r"\b20th\b": "twentieth ",
r"\b21st\b": "twenty first ",
r"\b22nd\b": "twenty second ",
r"\b23rd\b": "twenty third ",
r"\b30th\b": "thirtieth ",
r"\b31st\b": "thirty first ",
# Số đếm
r"\b0\b": "zero ",
r"\b1\b": "one ",
r"\b2\b": "two ",
r"\b3\b": "three ",
r"\b4\b": "four ",
r"\b5\b": "five ",
r"\b6\b": "six ",
r"\b7\b": "seven ",
r"\b8\b": "eight ",
r"\b9\b": "nine ",
r"\b10\b": "ten ",
r"\b11\b": "eleven ",
r"\b12\b": "twelve ",
r"\b13\b": "thirteen ",
r"\b14\b": "fourteen ",
r"\b15\b": "fifteen ",
r"\b16\b": "sixteen ",
r"\b17\b": "seventeen ",
r"\b18\b": "eighteen ",
r"\b19\b": "nineteen ",
r"\b20\b": "twenty ",
r"\b30\b": "thirty ",
r"\b40\b": "forty ",
r"\b50\b": "fifty ",
r"\b60\b": "sixty ",
r"\b70\b": "seventy ",
r"\b80\b": "eighty ",
r"\b90\b": "ninety ",
r"\b100\b": "one hundred ",
# World Wars
r"\bWorld War I\b": "World War One",
r"\bWorld War II\b": "World War Two",
r"\bWorld War III\b": "World War Three", # Optional
r"\bWorld War IV\b": "World War Four", # Optional
# Roman Emperors & Byzantine rulers
r"\bTheodosius I\b": "Theodosius the First",
r"\bTheodosius II\b": "Theodosius the Second",
r"\bConstantine I\b": "Constantine the First",
r"\bConstantine II\b": "Constantine the Second",
r"\bConstantine III\b": "Constantine the Third",
r"\bJustinian I\b": "Justinian the First",
r"\bJustinian II\b": "Justinian the Second",
r"\bAlexander I\b": "Alexander the First",
r"\bAlexander II\b": "Alexander the Second",
r"\bAlexander III\b": "Alexander the Third",
# English monarchs
r"\bHenry I\b": "Henry the First",
r"\bHenry II\b": "Henry the Second",
r"\bHenry III\b": "Henry the Third",
r"\bHenry IV\b": "Henry the Fourth",
r"\bHenry V\b": "Henry the Fifth",
r"\bHenry VI\b": "Henry the Sixth",
r"\bHenry VII\b": "Henry the Seventh",
r"\bHenry VIII\b": "Henry the Eighth",
r"\bEdward I\b": "Edward the First",
r"\bEdward II\b": "Edward the Second",
r"\bEdward III\b": "Edward the Third",
r"\bEdward IV\b": "Edward the Fourth",
r"\bEdward V\b": "Edward the Fifth",
r"\bEdward VI\b": "Edward the Sixth",
r"\bEdward VII\b": "Edward the Seventh",
r"\bEdward VIII\b": "Edward the Eighth",
r"\bCharles I\b": "Charles the First",
r"\bCharles II\b": "Charles the Second",
r"\bCharles III\b": "Charles the Third",
r"\bJames I\b": "James the First",
r"\bJames II\b": "James the Second",
r"\bWilliam I\b": "William the First",
r"\bWilliam II\b": "William the Second",
r"\bElizabeth I\b": "Elizabeth the First",
r"\bElizabeth II\b": "Elizabeth the Second",
# French monarchs
r"\bLouis I\b": "Louis the First",
r"\bLouis II\b": "Louis the Second",
r"\bLouis III\b": "Louis the Third",
r"\bLouis IV\b": "Louis the Fourth",
r"\bLouis V\b": "Louis the Fifth",
r"\bLouis VI\b": "Louis the Sixth",
r"\bLouis VII\b": "Louis the Seventh",
r"\bLouis VIII\b": "Louis the Eighth",
r"\bLouis IX\b": "Louis the Ninth",
r"\bLouis X\b": "Louis the Tenth",
r"\bLouis XI\b": "Louis the Eleventh",
r"\bLouis XII\b": "Louis the Twelfth",
r"\bLouis XIII\b": "Louis the Thirteenth",
r"\bLouis XIV\b": "Louis the Fourteenth",
r"\bLouis XV\b": "Louis the Fifteenth",
r"\bLouis XVI\b": "Louis the Sixteenth",
r"\bPhilip II\b": "Philip the Second",
r"\bPhilip IV\b": "Philip the Fourth",
# Russian monarchs
r"\bNicholas I\b": "Nicholas the First",
r"\bNicholas II\b": "Nicholas the Second",
r"\bPeter I\b": "Peter the First",
r"\bPeter II\b": "Peter the Second",
r"\bPeter III\b": "Peter the Third",
r"\bCatherine II\b": "Catherine the Second",
# Popes
r"\bPope John Paul I\b": "Pope John Paul the First",
r"\bPope John Paul II\b": "Pope John Paul the Second",
r"\bPope Benedict XVI\b": "Pope Benedict the Sixteenth",
r"\bPope Pius XII\b": "Pope Pius the Twelfth",
r"\bPope Leo XIII\b": "Pope Leo the Thirteenth",
r"\bPope Innocent III\b": "Pope Innocent the Third",
r"\bPope Gregory XIII\b": "Pope Gregory the Thirteenth",
# Other famous Roman numeral events/titles
r"\bSuper Bowl I\b": "Super Bowl One",
r"\bSuper Bowl II\b": "Super Bowl Two",
r"\bSuper Bowl III\b": "Super Bowl Three",
r"\bSuper Bowl IV\b": "Super Bowl Four",
r"\bSuper Bowl V\b": "Super Bowl Five",
r"\bSuper Bowl X\b": "Super Bowl Ten",
r"\bSuper Bowl XX\b": "Super Bowl Twenty",
r"\bFinal Fantasy VII\b": "Final Fantasy Seven",
r"\bFinal Fantasy VIII\b": "Final Fantasy Eight",
r"\bFinal Fantasy IX\b": "Final Fantasy Nine",
r"\bFinal Fantasy X\b": "Final Fantasy Ten"
}
def process_text(self, text: str) -> str:
"""Chuẩn hóa văn bản giữ nguyên ngữ cảnh"""
# Bước 1: Xử lý từ viết tắt
for pattern, replacement in self.abbreviation_patterns.items():
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
# Bước 2: Chuẩn hóa từ đặc biệt (giữ nguyên case)
def replace_match(match):
word = match.group(0)
return self.special_cases.get(word.lower(), word)
processed_text = self.special_regex.sub(replace_match, text)
# Bước 3: Chuẩn hóa khoảng trắng
processed_text = ' '.join(processed_text.split())
return processed_text
@staticmethod
def split_num(num: re.Match) -> str:
num = num.group()
if ':' in num:
hours, minutes = map(int, num.split(':'))
if minutes == 0:
return f"{hours} o'clock"
elif minutes < 10:
return f'{hours} oh {minutes}'
return f'{hours} {minutes}'
year = int(num[:4])
if year < 1100 or year % 1000 < 10:
return num
left, right = num[:2], int(num[2:4])
suffix = 's' if num.endswith('s') else ''
if 100 <= year % 1000 <= 999:
if right == 0:
return f'{left} hundred{suffix}'
elif right < 10:
return f'{left} oh {right}{suffix}'
return f'{left} {right}{suffix}'
@staticmethod
def flip_money(match: re.Match) -> str:
m = match.group()
currency = 'dollar' if m[0] == '$' else 'pound'
if '.' not in m:
singular = '' if m[1:] == '1' else 's'
return f'{m[1:]} {currency}{singular}'
whole, cents = m[1:].split('.')
singular = '' if whole == '1' else 's'
cents = int(cents.ljust(2, '0'))
coins = f"cent{'' if cents == 1 else 's'}" if m[0] == '$' else ('penny' if cents == 1 else 'pence')
return f'{whole} {currency}{singular} and {cents} {coins}'
@staticmethod
def point_num(match):
whole, fractional = match.group().split('.')
return ' point '.join([whole, ' '.join(fractional)])
def normalize_text(self, text: str) -> str:
replacements = {
chr(8216): "'",
chr(8217): "'",
'«': chr(8220),
'»': chr(8221),
chr(8220): '"',
chr(8221): '"',
'(': '«',
')': '»'
}
for old, new in replacements.items():
text = text.replace(old, new)
punctuation_replacements = {
'、': ',',
'。': '.',
'!': '!',
',': ',',
':': ':',
';': ';',
'?': '?',
}
for old, new in punctuation_replacements.items():
text = text.replace(old, new + ' ')
text = re.sub(r'[^\S\n]', ' ', text)
text = re.sub(r' +', ' ', text)
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
abbreviation_patterns = [
(r'\bD[Rr]\.(?= [A-Z])', 'Doctor'),
(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister'),
(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss'),
(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs'),
(r'\betc\.(?! [A-Z])', 'etc'),
(r'(?i)\b(y)eah?\b', r"\1e'a"),
]
for pattern, replacement in abbreviation_patterns:
text = re.sub(pattern, replacement, text)
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', self.split_num, text)
text = re.sub(r'(?<=\d),(?=\d)', '', text)
text = re.sub(
r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b',
self.flip_money,
text
)
text = re.sub(r'\d*\.\d+', self.point_num, text)
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text)
text = re.sub(r'(?<=\d)S', ' S', text)
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
text = re.sub(r"(?<=X')S\b", 's', text)
text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
text = re.sub(r'(?i)(?<=[A-Z])\.(?=[A-Z])', '-', text)
return text.strip()
def tokenize(self, phonemes: str) -> List[int]:
return [self.VOCAB[x] for x in phonemes if x in self.VOCAB]
def phonemize(self, text: str, lang: str = 'en-us', normalize: bool = True) -> str:
if normalize:
text = self.normalize_text(text)
if lang not in self.phonemizers:
print(f"Language '{lang}' not supported. Defaulting to 'en-us'.")
lang = 'en-us'
phonemes = self.phonemizers[lang].phonemize([text])
phonemes = phonemes[0] if phonemes else ''
replacements = {
'kəkˈoːɹoʊ': 'kˈoʊkəɹoʊ',
'kəkˈɔːɹəʊ': 'kˈəʊkəɹəʊ',
'ʲ': 'j',
'r': 'ɹ',
'x': 'k',
'ɬ': 'l',
}
for old, new in replacements.items():
phonemes = phonemes.replace(old, new)
phonemes = re.sub(r'(?<=[a-zɹː])(?=hˈʌndɹɪd)', ' ', phonemes)
phonemes = re.sub(r' z(?=[;:,.!?¡¿—…"«»“” ]|$)', 'z', phonemes)
if lang == 'a':
phonemes = re.sub(r'(?<=nˈaɪn)ti(?!ː)', 'di', phonemes)
phonemes = ''.join(filter(lambda p: p in self.VOCAB, phonemes))
return phonemes.strip()
# Khởi tạo môi trường - Ưu tiên GPU
CUDA_AVAILABLE = torch.cuda.is_available()
class TTSModel:
def __init__(self):
self.use_cuda = CUDA_AVAILABLE
self.models = {}
self.tokenizer = Tokenizer()
self.voice_cache = {}
self.voice_files = self._discover_voices()
try:
if self.use_cuda:
self.models['cuda'] = torch.compile(KModel().to('cuda').eval(), mode='max-autotune')
with torch.no_grad():
_ = self.models['cuda'](torch.randn(1, 64).cuda(), torch.randn(1, 80, 100).cuda(), 1.0)
self.models['cpu'] = KModel().to('cpu').eval()
except Exception as e:
print(f"Error loading model: {e}")
self.models = {'cpu': KModel().to('cpu').eval()}
self.pipelines = {
'a': KPipeline(lang_code='a', model=False),
'b': KPipeline(lang_code='b', model=False)
}
def _discover_voices(self):
"""Discover available voice files in the voices folder"""
voice_files = {}
voices_dir = "voices"
if not os.path.exists(voices_dir):
os.makedirs(voices_dir)
print(f"Created voices directory at {os.path.abspath(voices_dir)}")
return voice_files
for file in os.listdir(voices_dir):
if file.endswith(".pt"):
voice_name = os.path.splitext(file)[0]
voice_files[voice_name] = os.path.join(voices_dir, file)
print(f"Found voice: {voice_name}")
return voice_files
def get_voice_list(self):
"""Get list of available voices for the UI"""
voices = list(self.voice_files.keys())
if not voices:
print("Warning: No voice files found in voices folder")
return voices
model_manager = TTSModel()
class TextProcessor:
@staticmethod
def clean_text(text: str) -> str:
text = TextProcessor._process_special_cases(text)
re_tab = re.compile(r'[\r\t]')
re_spaces = re.compile(r' +')
re_punctuation = re.compile(r'(\s)([,.!?])')
text = re_tab.sub(' ', text)
text = re_spaces.sub(' ', text)
text = re_punctuation.sub(r'\2', text)
return text.strip()
@staticmethod
def _process_special_cases(text: str) -> str:
"""Pipeline xử lý đặc biệt với thứ tự tối ưu"""
text = TextProcessor._process_emails(text)
text = TextProcessor._process_websites(text)
text = TextProcessor._process_phone_numbers(text)
text = TextProcessor._process_temperatures(text)
text = TextProcessor._process_measurements(text)
text = TextProcessor._process_currency(text)
text = TextProcessor._process_percentages(text)
text = TextProcessor._process_math_operations(text)
text = TextProcessor._process_times(text)
text = TextProcessor._process_years(text)
text = TextProcessor._process_special_symbols(text)
return text
@staticmethod
def _process_emails(text: str) -> str:
"""Process emails with correct English pronunciation for all special characters"""
def convert_email(match):
full_email = match.group(0)
# Replace each special character with its English pronunciation
processed = (full_email
.replace('@', ' at ')
.replace('.', ' dot ')
.replace('-', ' dash ')
.replace('_', ' underscore ')
.replace('+', ' plus ')
.replace('/', ' slash ')
.replace('=', ' equals '))
return processed
# Regex to match all email formats
email_pattern = r'\b[\w.+-]+@[\w.-]+\.[a-zA-Z]{2,}\b'
return re.sub(email_pattern, convert_email, text)
@staticmethod
def _process_websites(text: str) -> str:
"""Process websites with correct English pronunciation for special characters"""
def convert_website(match):
url = match.group(1)
# Replace each special character with its English pronunciation
return (url.replace('.', ' dot ')
.replace('-', ' dash ')
.replace('_', ' underscore ')
.replace('/', ' slash ')
.replace('?', ' question mark ')
.replace('=', ' equals ')
.replace('&', ' ampersand '))
# Only process websites that don't contain @ (to avoid conflict with emails)
website_pattern = r'\b(?![\w.-]*@)((?:https?://)?(?:www\.)?[\w.-]+\.[a-z]{2,}(?:[/?=&#][\w.-]*)*)\b'
return re.sub(website_pattern, convert_website, text, flags=re.IGNORECASE)
@staticmethod
def _process_temperatures(text: str) -> str:
"""Process temperatures and cardinal directions with degree symbols"""
def temp_to_words(temp, unit):
temp_text = TextProcessor._number_to_words(temp)
unit = unit.upper() if unit else ''
unit_map = {
'C': 'degrees Celsius',
'F': 'degrees Fahrenheit',
'N': 'degrees north',
'S': 'degrees south',
'E': 'degrees east',
'W': 'degrees west',
'': 'degrees' # Default case for just number with degree symbol
}
unit_text = unit_map.get(unit, f'degrees {unit}')
return f"{temp_text} {unit_text}"
# Process formats like 75°F, 100°C, 15°N, 120°E
text = re.sub(
r'(-?\d+)°([NSEWCFnsewcf]?)',
lambda m: temp_to_words(m.group(1), m.group(2)),
text,
flags=re.IGNORECASE
)
# Add degree symbol pronunciation when standalone
text = re.sub(r'°', ' degrees ', text)
return text
@staticmethod
def _process_measurements(text: str) -> str:
"""Xử lý các đơn vị đo lường với số thập phân"""
units_map = {
'km/h': 'kilometers per hour',
'mph': 'miles per hour',
'kg': 'kilograms',
'g': 'grams',
'cm': 'centimeters',
'm': 'meters',
'mm': 'millimeters',
'L': 'liters',
'l': 'liters',
'ml': 'milliliters',
'mL': 'milliliters',
'h': 'hours',
'min': 'minutes',
's': 'seconds'
}
plural_units = {'L', 'l', 'mL', 'ml'} # Các đơn vị không thêm 's' khi số nhiều
def measurement_to_words(value, unit):
try:
# Xử lý số thập phân
if '.' in value:
integer_part, decimal_part = value.split('.')
value_text = (f"{TextProcessor._number_to_words(integer_part)} "
f"point {' '.join(TextProcessor._digit_to_word(d) for d in decimal_part)}")
else:
value_text = TextProcessor._number_to_words(value)
# Xử lý đơn vị
unit_lower = unit.lower()
unit_text = units_map.get(unit, units_map.get(unit_lower, unit))
# Xử lý số nhiều (trừ các đơn vị đặc biệt)
if (float(value) != 1 and unit_lower in units_map
and unit not in plural_units):
unit_text += 's'
return f"{value_text} {unit_text}"
except:
return f"{value}{unit}"
# Xử lý định dạng 180 km/h, 60 kg, 3.5 L, 250 ml
text = re.sub(
r'(-?\d+\.?\d*)\s*({})\b'.format('|'.join(
re.escape(key) for key in units_map.keys()
)),
lambda m: measurement_to_words(m.group(1), m.group(2)),
text,
flags=re.IGNORECASE
)
return text
@staticmethod
def _process_currency(text: str) -> str:
"""Xử lý các loại tiền tệ"""
currency_map = {
'$': 'dollars',
'€': 'euros',
'£': 'pounds',
'¥': 'yen',
'₩': 'won',
'₽': 'rubles'
}
def currency_to_words(value, symbol):
value_text = TextProcessor._number_to_words(value)
currency_text = currency_map.get(symbol, '')
return f"{value_text} {currency_text}" if currency_text else f"{symbol}{value}"
# Xử lý định dạng $10, €50
text = re.sub(
r'([$€£¥₩₽])(\d+\.?\d*)',
lambda m: currency_to_words(m.group(2), m.group(1)),
text
)
return text
@staticmethod
def _process_percentages(text: str) -> str:
"""Xử lý phần trăm"""
text = re.sub(
r'(\d+\.?\d*)%',
lambda m: f"{TextProcessor._number_to_words(m.group(1))} percent",
text
)
return text
@staticmethod
def _process_math_operations(text: str) -> str:
"""Xử lý các phép toán"""
math_map = {
'+': 'plus',
'-': 'minus',
'×': 'times',
'*': 'times',
'÷': 'divided by',
'/': 'divided by',
'=': 'equals',
'>': 'is greater than',
'<': 'is less than'
}
# Xử lý phép toán đơn giản 2+2=4
text = re.sub(
r'(\d+)\s*([+×*÷/=><-])\s*(\d+)',
lambda m: (f"{TextProcessor._number_to_words(m.group(1))} "
f"{math_map.get(m.group(2), m.group(2))} "
f"{TextProcessor._number_to_words(m.group(3))}"),
text
)
# Xử lý phân số 4/5
text = re.sub(
r'(\d+)/(\d+)',
lambda m: (f"{TextProcessor._number_to_words(m.group(1))} "
f"divided by {TextProcessor._number_to_words(m.group(2))}"),
text
)
return text
@staticmethod
def _process_special_symbols(text: str) -> str:
"""Xử lý các ký hiệu đặc biệt"""
symbol_map = {
'@': 'at',
'#': 'number',
'&': 'and',
'_': 'underscore'
}
# Xử lý @home → at home
text = re.sub(
r'@(\w+)',
lambda m: f"at {m.group(1)}",
text
)
# Xử lý #1 → number one
text = re.sub(
r'#(\d+)',
lambda m: f"number {TextProcessor._number_to_words(m.group(1))}",
text
)
# Xử lý các ký hiệu đơn lẻ
for symbol, replacement in symbol_map.items():
text = text.replace(symbol, f' {replacement} ')
return text
@staticmethod
def _process_times(text: str) -> str:
"""Xử lý định dạng thời gian thành cách đọc tự nhiên"""
# Xử lý định dạng 12:30, 12:30 AM/PM
text = re.sub(
r'\b(\d{1,2}):(\d{2})\s*(AM|PM|am|pm)?\b',
lambda m: TextProcessor._time_to_words(m.group(1), m.group(2), m.group(3)),
text
)
return text
@staticmethod
def _time_to_words(hour: str, minute: str, period: str = None) -> str:
"""Chuyển thời gian thành cách đọc tự nhiên"""
hour_int = int(hour)
minute_int = int(minute)
# Xử lý AM/PM
period_text = ""
if period:
period_text = " AM" if period.upper() == "AM" else " PM"
# Xử lý giờ
if hour_int == 0:
hour_text = "twelve"
elif hour_int <= 12:
hour_text = TextProcessor._number_to_words(str(hour_int))
else:
hour_text = TextProcessor._number_to_words(str(hour_int - 12))
# Xử lý phút
if minute_int == 0:
minute_text = "o'clock"
elif minute_int < 10:
minute_text = "oh " + TextProcessor._number_to_words(str(minute_int))
else:
minute_text = TextProcessor._number_to_words(str(minute_int))
# Kết hợp thành câu
if minute_int == 0:
return f"{hour_text}{period_text}"
return f"{hour_text} {minute_text}{period_text}"
@staticmethod
def _process_years(text: str) -> str:
"""Xử lý các năm trong văn bản"""
# Xử lý năm 4 chữ số từ 1000-2999 (phổ biến nhất)
text = re.sub(
r'\b(1[0-9]{3}|2[0-9]{3})\b',
lambda m: TextProcessor._year_to_words(m.group(1)),
text
)
# Xử lý năm 2 chữ số (nếu cần)
text = re.sub(
r'\b([0-9]{2})\b',
lambda m: TextProcessor._two_digit_year_to_words(m.group(1)),
text
)
return text
@staticmethod
def _year_to_words(year: str) -> str:
"""Chuyển năm 4 chữ số thành chữ"""
if len(year) != 4:
return year
# Năm từ 2000-2099 có thể đọc là "two thousand twenty-one" hoặc "twenty twenty-one"
if year.startswith('20'):
# Lựa chọn cách đọc phổ biến hơn
return f"twenty {TextProcessor._two_digit_year_to_words(year[2:])}"
# Các năm khác đọc bình thường
return TextProcessor._number_to_words(year)
@staticmethod
def _two_digit_year_to_words(num: str) -> str:
"""Chuyển số 2 chữ số thành chữ (cho năm)"""
if len(num) != 2:
return num
num_int = int(num)
if num_int == 0:
return "zero zero"
if num_int < 10:
return f"oh {TextProcessor._digit_to_word(num[1])}"
ones = ['', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine',
'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen',
'seventeen', 'eighteen', 'nineteen']
tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy',
'eighty', 'ninety']
if num_int < 20:
return ones[num_int]
ten, one = divmod(num_int, 10)
if one == 0:
return tens[ten]
return f"{tens[ten]} {ones[one]}"
@staticmethod
def _process_phone_numbers(text: str) -> str:
"""Xử lý số điện thoại với regex chính xác hơn"""
# Pattern mới tránh xung đột với số La Mã
phone_pattern = r'\b(\d{3})[-. ]?(\d{3})[-. ]?(\d{4})\b'
def phone_to_words(match):
groups = match.groups()
# Đọc từng số trong từng nhóm và thêm dấu phẩy (,) để tạo ngắt nghỉ
parts = []
for part in groups:
digits = ' '.join([TextProcessor._digit_to_word(d) for d in part])
parts.append(digits)
return ', '.join(parts) # Thêm dấu phẩy để tạo ngắt nghỉ khi đọc
return re.sub(phone_pattern, phone_to_words, text)
@staticmethod
def _process_currency_numbers(text: str) -> str:
return re.sub(
r'\$?(\d{1,3}(?:,\d{3})*(?:\.\d+)?)\b',
lambda m: f"{TextProcessor._number_to_words(m.group(1))} dollars" if '$' in m.group(0)
else TextProcessor._number_to_words(m.group(1)),
text
)
@staticmethod
def _digit_to_word(digit: str) -> str:
digit_map = {
'0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four',
'5': 'five', '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine'
}
return digit_map.get(digit, digit)
@staticmethod
def _number_to_words(number: str) -> str:
num_str = number.replace(',', '')
try:
if '.' in num_str:
integer_part, decimal_part = num_str.split('.')
integer_text = TextProcessor._int_to_words(integer_part)
decimal_text = ' '.join([TextProcessor._digit_to_word(d) for d in decimal_part])
return f"{integer_text} point {decimal_text}"
return TextProcessor._int_to_words(num_str)
except:
return number
@staticmethod
def _digits_to_words(digits: str) -> str:
return ' '.join([TextProcessor._digit_to_word(d) for d in digits])
@staticmethod
def _int_to_words(num_str: str) -> str:
num = int(num_str)
if num == 0:
return 'zero'
units = ['', 'thousand', 'million', 'billion', 'trillion']
words = []
level = 0
while num > 0:
chunk = num % 1000
if chunk != 0:
words.append(TextProcessor._convert_less_than_thousand(chunk) + ' ' + units[level])
num = num // 1000
level += 1
return ' '.join(reversed(words)).strip()
@staticmethod
def _convert_less_than_thousand(num: int) -> str:
ones = ['', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine',
'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen',
'seventeen', 'eighteen', 'nineteen']
tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy',
'eighty', 'ninety']
if num == 0:
return ''
if num < 20:
return ones[num]
if num < 100:
return tens[num // 10] + (' ' + ones[num % 10] if num % 10 != 0 else '')
return ones[num // 100] + ' hundred' + (' ' + TextProcessor._convert_less_than_thousand(num % 100) if num % 100 != 0 else '')
@staticmethod
def split_sentences(text: str) -> List[str]:
re_special_cases = re.compile(r'(?<!\w)([A-Z][a-z]*\.)(?=\s)')
re_sentence_split = re.compile(r'(?<=[.!?])\s+')
sentences = []
for line in text.split('\n'):
stripped = line.strip()
if stripped:
stripped = re_special_cases.sub(r'\1Ⓝ', stripped)
parts = re_sentence_split.split(stripped)
for part in parts:
part = part.replace('Ⓝ', '')
if part:
sentences.append(part)
return sentences
@staticmethod
def parse_dialogues(text: str, prefixes: List[str]) -> List[Tuple[str, str]]:
"""Phân tích nội dung hội thoại với các prefix chỉ định"""
dialogues = []
current = None
for line in text.split('\n'):
line = line.strip()
if not line:
continue
# Kiểm tra xem dòng có bắt đầu bằng bất kỳ prefix nào không
found_prefix = None
for prefix in prefixes:
if line.lower().startswith(prefix.lower() + ':'):
found_prefix = prefix
break
if found_prefix:
if current:
# Xử lý các trường hợp đặc biệt trước khi thêm vào dialogues
processed_content = TextProcessor._process_special_cases(current[1])
dialogues.append((current[0], processed_content))
speaker = found_prefix
content = line[len(found_prefix)+1:].strip()
current = (speaker, content)
elif current:
current = (current[0], current[1] + ' ' + line)
if current:
# Xử lý các trường hợp đặc biệt cho dòng cuối cùng
processed_content = TextProcessor._process_special_cases(current[1])
dialogues.append((current[0], processed_content))
return dialogues
class AudioProcessor:
@staticmethod
def enhance_audio(audio: np.ndarray, volume: float = 1.0, pitch: float = 1.0) -> np.ndarray:
# 1. Chuẩn hóa và bảo vệ chống clipping
max_sample = np.max(np.abs(audio)) + 1e-8
audio = (audio / max_sample) * 0.9 * volume # Giữ headroom 10%
# 2. Soft clipping để tránh distortion
audio = np.tanh(audio * 1.5) / 1.5 # Hàm tanh cho soft clipping mượt
# 3. Chuyển sang AudioSegment với xử lý pitch
audio_seg = AudioSegment(
(audio * 32767).astype(np.int16).tobytes(),
frame_rate=24000,
sample_width=2,
channels=1
)
# 4. Xử lý pitch với crossfade
if pitch != 1.0:
audio_seg = audio_seg._spawn(
audio_seg.raw_data,
overrides={"frame_rate": int(audio_seg.frame_rate * pitch)}
).set_frame_rate(24000).fade_in(10).fade_out(10)
# 5. Xử lý động và lọc tần
audio_seg = compress_dynamic_range(
audio_seg,
threshold=-12.0,
ratio=3.5,
attack=5,
release=50
)
audio_seg = audio_seg.low_pass_filter(11000).high_pass_filter(200)
# 6. Chuẩn hóa an toàn
if audio_seg.max_dBFS > -1.0:
audio_seg = audio_seg.apply_gain(-audio_seg.max_dBFS * 0.8)
return np.array(audio_seg.get_array_of_samples()) / 32768.0
@staticmethod
def calculate_pause(text: str, pause_settings: Dict[str, int]) -> int:
"""Calculate pause duration with more precise rules"""
text = text.strip()
if not text:
return 0
# Special cases that should have no pause
if re.search(r'(?:^|\s)(?:Mr|Mrs|Ms|Dr|Prof|St|A\.M|P\.M|etc|e\.g|i\.e)\.$', text, re.IGNORECASE):
return 0
# Time formats (12:30) - minimal pause
if re.search(r'\b\d{1,2}:\d{2}\b', text):
return pause_settings.get('time_colon_pause', 50) # Default 50ms for times
# Determine pause based on last character
last_char = text[-1]
return pause_settings.get(last_char, pause_settings['default_pause'])
@staticmethod
def combine_segments(segments: List[AudioSegment], pauses: List[int]) -> AudioSegment:
"""Combine audio segments with frame-accurate timing"""
combined = AudioSegment.silent(duration=0) # Start with 0 silence
for i, (seg, pause) in enumerate(zip(segments, pauses)):
# Apply fades without affecting duration
seg = seg.fade_in(10).fade_out(10)
# Add segment
combined += seg
# Add pause if not the last segment
if i < len(segments) - 1:
combined += AudioSegment.silent(duration=max(50, pause))
return combined
@staticmethod
def combine_with_pauses(segments: List[AudioSegment], pauses: List[int]) -> AudioSegment:
combined = AudioSegment.empty()
for i, (seg, pause) in enumerate(zip(segments, pauses)):
seg = seg.fade_in(50).fade_out(50)
combined += seg
if i < len(segments) - 1:
combined += AudioSegment.silent(duration=pause)
return combined
class SubtitleGenerator:
@staticmethod
def clean_subtitle_text(text: str) -> str:
"""Remove Q:/A:/CHARx: prefixes from subtitle text"""
# Remove all speaker prefixes and extra spaces
cleaned = re.sub(r'^(Q|A|CHAR\d+):\s*', '', text.strip())
return cleaned
@staticmethod
def split_long_sentences(text: str, max_length: int = 120) -> List[str]:
"""Split long sentences at punctuation marks while preserving meaning"""
sentences = []
current = ""
# Split at punctuation first
parts = re.split(r'([.!?])', text)
# Recombine with punctuation but check length
for i in range(0, len(parts)-1, 2):
part = parts[i] + (parts[i+1] if i+1 < len(parts) else "")
if len(current + part) <= max_length:
current += part
else:
if current:
sentences.append(current)
current = part
if current:
sentences.append(current)
return sentences
@staticmethod
def generate_srt(audio_segments: List[AudioSegment], sentences: List[str], pause_settings: Dict[str, int]) -> str:
"""Generate SRT format subtitles with precise timing information"""
subtitles = []
current_time = 150 # Start with initial silence (150ms)
max_subtitle_length = 120 # Maximum characters per subtitle line
for i, (seg, sentence) in enumerate(zip(audio_segments, sentences)):
# Split long sentences into smaller chunks at punctuation
sentence_chunks = SubtitleGenerator.split_long_sentences(cleaned_sentence, max_subtitle_length)
# Calculate duration per chunk (equal division for simplicity)
chunk_duration = len(seg) / max(1, len(sentence_chunks))
for j, chunk in enumerate(sentence_chunks):
start_time = current_time + (j * chunk_duration)
end_time = start_time + chunk_duration
# Add subtitle entry
subtitles.append({
'start': int(start_time),
'end': int(end_time),
'text': chunk.strip()
})
# Update current time with segment duration
current_time += len(seg)
# Add pause if not the last segment
if i < len(audio_segments) - 1:
pause = AudioProcessor.calculate_pause(sentence, pause_settings)
current_time += max(100, pause)
# Convert to SRT format with precise timing
srt_content = []
for idx, sub in enumerate(subtitles, 1):
start_time = timedelta(milliseconds=sub['start'])
end_time = timedelta(milliseconds=sub['end'])
# Format: 00:00:01,040 --> 00:00:09,760
start_str = f"{start_time.total_seconds() // 3600:02.0f}:{(start_time.total_seconds() % 3600) // 60:02.0f}:{start_time.total_seconds() % 60:06.3f}".replace('.', ',')
end_str = f"{end_time.total_seconds() // 3600:02.0f}:{(end_time.total_seconds() % 3600) // 60:02.0f}:{end_time.total_seconds() % 60:06.3f}".replace('.', ',')
srt_content.append(
f"{idx}\n"
f"{start_str} --> {end_str}\n"
f"{sub['text']}\n"
)
return "\n".join(srt_content)
class TTSGenerator:
def __init__(self):
self.text_processor = TextProcessor()
self.audio_processor = AudioProcessor()
self.tokenizer = Tokenizer()
self.subtitle_generator = SubtitleGenerator()
def generate_sentence_audio(self, sentence: str, voice: str, speed: float,
device: str, volume: float = 1.0, pitch: float = 1.0) -> Optional[Tuple[int, np.ndarray]]:
try:
# Check if voice exists
if voice not in model_manager.voice_files:
print(f"Voice {voice} not found in voices folder")
return None
# Load voice if not cached
if voice not in model_manager.voice_cache:
voice_path = model_manager.voice_files[voice]
try:
# Load the voice model file
voice_data = torch.load(voice_path, map_location='cpu')
# Assuming the voice file contains the necessary components for the pipeline
pipeline = model_manager.pipelines['a'] # or 'b' depending on your needs
pack = voice_data # or extract the relevant parts from voice_data
model_manager.voice_cache[voice] = (pipeline, pack)
except Exception as e:
print(f"Error loading voice {voice}: {e}")
return None
else:
pipeline, pack = model_manager.voice_cache[voice]
# Process text
processed_text = self.tokenizer.process_text(sentence)
# Generate audio
for _, ps, _ in pipeline(processed_text, voice, speed):
ref_s = pack[len(ps)-1]
if device == 'cuda':
ps = ps.cuda()
ref_s = ref_s.cuda()
with torch.cuda.amp.autocast(enabled=(device=='cuda')):
audio = model_manager.models[device](ps, ref_s, speed).cpu().numpy()
return (24000, self.audio_processor.enhance_audio(audio, volume, pitch))
except Exception as e:
print(f"Error generating audio: {e}")
return None
def generate_story_audio(self, text: str, voice: str, speed: float, device: str,
pause_settings: Dict[str, int], volume: float = 1.0, pitch: float = 1.0) -> Tuple[Tuple[int, np.ndarray], str, str]:
start_time = time.time()
clean_text = self.text_processor.clean_text(text)
sentences = self.text_processor.split_sentences(clean_text)
if not sentences:
return None, "No content to read", ""
audio_segments = []
pause_durations = []
# Adjust pause settings based on speed (more precise calculation)
speed_factor = max(0.5, min(2.0, speed)) # Clamp speed factor
adjusted_pause_settings = {
'default_pause': int(pause_settings['default_pause'] / speed_factor),
'dot_pause': int(pause_settings['dot_pause'] / speed_factor),
'ques_pause': int(pause_settings['ques_pause'] / speed_factor),
'comma_pause': int(pause_settings['comma_pause'] / speed_factor),
'colon_pause': int(pause_settings['colon_pause'] / speed_factor),
'excl_pause': int(pause_settings['dot_pause'] / speed_factor),
'semi_pause': int(pause_settings['colon_pause'] / speed_factor),
'dash_pause': int(pause_settings['comma_pause'] / speed_factor),
'time_colon_pause': 50 # Fixed short pause for time formats
}
# Generate each audio segment
for sentence in sentences:
result = self.generate_sentence_audio(sentence, voice, speed, device, volume, pitch)
if not result:
continue
sample_rate, audio_data = result
audio_seg = AudioSegment(
(audio_data * 32767).astype(np.int16).tobytes(),
frame_rate=sample_rate,
sample_width=2,
channels=1
)
audio_segments.append(audio_seg)
# Calculate precise pause duration
pause = self.audio_processor.calculate_pause(sentence, adjusted_pause_settings)
pause_durations.append(pause)
if not audio_segments:
return None, "Failed to generate audio", ""
# Combine with frame-accurate timing
combined_audio = self.audio_processor.combine_segments(audio_segments, pause_durations)
# Export with precise timing
with io.BytesIO() as buffer:
combined_audio.export(buffer, format="mp3", bitrate="256k", parameters=["-ar", str(combined_audio.frame_rate)])
buffer.seek(0)
audio_data = np.frombuffer(buffer.read(), dtype=np.uint8)
# Generate subtitles with the same timing used for audio
subtitles = self.subtitle_generator.generate_srt(audio_segments, sentences, adjusted_pause_settings)
stats = (f"Processed {len(clean_text)} chars, {len(clean_text.split())} words\n"
f"Audio duration: {len(combined_audio)/1000:.2f}s\n"
f"Time: {time.time() - start_time:.2f}s\n"
f"Device: {device.upper()}")
return (combined_audio.frame_rate, audio_data), stats, subtitles
def create_interface():
css = """
.gradio-container { max-width: 1000px !important; }
.audio-output { height: 300px !important; }
.advanced-settings { background-color: #f5f5f5; padding: 15px; border-radius: 5px; }
.tab { min-height: 500px; }
.qa-example { font-family: monospace; white-space: pre; background-color: #f8f9fa; padding: 10px; border-radius: 5px; }
.char-settings { display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; margin-bottom: 15px; }
.char-settings .voice-select { min-width: 150px; }
.char-settings .speed-slider { min-width: 120px; }
.subtitle-output { max-height: 300px; overflow-y: auto; }
.download-btn { margin-top: 10px; }
"""
with gr.Blocks(title="Advanced Multi-Character TTS", css=css) as app:
gr.Markdown("## 🎙️ Advanced TTS with Multi-Character Dialogue & Subtitles")
with gr.Tabs():
with gr.TabItem("Standard Mode", elem_classes="tab"):
with gr.Row():
with gr.Column():
phonemization_toggle = gr.Checkbox(
label="Enable Advanced Phonemization",
value=False,
info="Improve pronunciation but may affect performance"
)
text_input = gr.Textbox(
label="Input Text",
value="Contact us at info@example.com or call 012-345-6789. Our website is https://www.example.com",
lines=7
)
with gr.Accordion("Voice Settings", open=True):
voice = gr.Dropdown(
label="Select Voice",
choices=model_manager.get_voice_list(),
value=model_manager.get_voice_list()[0] if model_manager.get_voice_list() else None
)
with gr.Row():
speed = gr.Slider(
label="Speed",
minimum=0.7,
maximum=1.3,
value=1.0,
step=0.05
)
volume = gr.Slider(
label="Volume",
minimum=0.5,
maximum=2.0,
value=2.0,
step=0.1
)
pitch = gr.Slider(
label="Pitch",
minimum=0.8,
maximum=1.2,
value=1.0,
step=0.05
)
device = gr.Radio(
label="Processing Device",
choices=["GPU 🚀" if CUDA_AVAILABLE else "GPU (Not Available)", "CPU"],
value="GPU 🚀" if CUDA_AVAILABLE else "CPU"
)
with gr.Accordion("Pause Settings (ms)", open=False):
with gr.Row():
default_pause = gr.Slider(0, 2000, 200, label="Default")
dot_pause = gr.Slider(0, 3000, 600, label="Period (.)")
ques_pause = gr.Slider(0, 3000, 800, label="Question (?)")
with gr.Row():
comma_pause = gr.Slider(0, 1500, 300, label="Comma (,)")
colon_pause = gr.Slider(0, 2000, 400, label="Colon (:)")
generate_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Output Audio", elem_classes="audio-output")
stats_output = gr.Textbox(label="Processing Stats", lines=4)
subtitle_output = gr.Textbox(label="Subtitles (.srt format)", lines=10, elem_classes="subtitle-output")
download_btn = gr.Button("Download SRT File", elem_classes="download-btn")
gr.Examples(
examples=[
["Call 123-456-7890 for support"],
["Email me at john.doe@company.com"],
["Visit https://example.org for more info"],
["The price is $1,234.56"]
],
inputs=text_input,
label="Special Format Examples"
)
generator = TTSGenerator()
def generate(text, voice, speed, volume, pitch, device, default_pause, dot_pause, ques_pause, comma_pause, colon_pause):
device = "cuda" if "GPU" in device and CUDA_AVAILABLE else "cpu"
pause_settings = {
'default_pause': default_pause,
'dot_pause': dot_pause,
'ques_pause': ques_pause,
'comma_pause': comma_pause,
'colon_pause': colon_pause,
'excl_pause': dot_pause,
'semi_pause': colon_pause,
'dash_pause': comma_pause,
'time_colon_pause': 0
}
result, stats, subtitles = generator.generate_story_audio(
text, voice, speed, device, pause_settings, volume, pitch
)
if result:
sample_rate, audio_data = result
filepath = "/tmp/output.mp3"
with open(filepath, "wb") as f:
f.write(audio_data.tobytes())
# Save SRT file
srt_path = "/tmp/subtitles.srt"
with open(srt_path, "w") as f:
f.write(subtitles)
return filepath, stats, subtitles, srt_path
return None, stats, "", None
# Store SRT file paths
srt_file = gr.State()
generate_btn.click(
fn=generate,
inputs=[text_input, voice, speed, volume, pitch, device, default_pause, dot_pause, ques_pause, comma_pause, colon_pause],
outputs=[audio_output, stats_output, subtitle_output, srt_file]
)
# Download button handlers
download_btn.click(
fn=lambda srt_path: srt_path if srt_path else None,
inputs=[srt_file],
outputs=gr.File(label="Download SRT File")
)
return app
if __name__ == "__main__":
app = create_interface()
app.launch(server_name="0.0.0.0", server_port=7860)