tts_project / hing_bert_module /transliteration.py
PraveenSharma08's picture
Initial project upload: Hindi/English Text-to-Speech pipeline
8a02978
import os
from typing import Dict, Union
from hindi_xlit import HindiTransliterator
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DICTIONARY_PATH = os.path.join(BASE_DIR, 'dictionary.txt')
def load_dictionary(filename: str = None) -> Dict[str, str]:
"""Load mythology dictionary from file (English→Hindi)."""
filename = filename or DICTIONARY_PATH
dictionary = {}
try:
with open(filename, 'r', encoding='utf-8') as f:
in_dict = False
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if 'MYTHOLOGY_DICTIONARY = {' in line:
in_dict = True
line = line.split('{', 1)[1].strip()
if not line:
continue
if not in_dict:
continue
if ':' in line:
while not line.rstrip().endswith(','):
next_line = next(f, '').strip()
if not next_line:
break
line += ' ' + next_line
line = line.split('}')[0].strip()
entries = [e.strip() for e in line.split(',') if ':' in e]
for entry in entries:
try:
key_part, value_part = entry.split(':', 1)
key = key_part.strip().strip("'\"")
value = value_part.strip().strip("'\"").rstrip('}')
if key and value:
dictionary[key.lower()] = value
except (ValueError, IndexError):
continue
if '}' in line and in_dict:
break
print(f"✓ Dictionary loaded successfully: {len(dictionary)} words")
return dictionary
except FileNotFoundError:
print(f"Warning: Dictionary file '{filename}' not found.")
return {}
except Exception as e:
print(f"Warning: Error loading dictionary: {str(e)}")
return {}
def get_transliteration(word: str, dictionary: Dict[str, str], transliterator: HindiTransliterator, show_source: bool = False) -> Union[str, tuple]:
"""Get transliteration for a word (dictionary first, then model)."""
word_lower = word.lower().strip()
if word_lower in dictionary:
result = dictionary[word_lower]
return (result, "dictionary") if show_source else result
try:
model_result = transliterator.transliterate(word)
result = model_result[0] if isinstance(model_result, list) else model_result
return (result, "model") if show_source else result
except Exception:
return (word, "error") if show_source else word