tts_project / imp_scripts /translitor.py
PraveenSharma08's picture
Initial project upload: Hindi/English Text-to-Speech pipeline
8a02978
import sys
import io
import ast
# Set UTF-8 encoding for Windows console
if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
from hindi_xlit import HindiTransliterator
def load_dictionary(filename='dictionary.txt'):
"""
Load the mythology dictionary from file.
Returns a dictionary mapping English words to Hindi transliterations.
"""
dictionary = {}
try:
with open(filename, 'r', encoding='utf-8') as f:
in_dict = False
for line in f:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Check if we're in the dictionary section
if 'MYTHOLOGY_DICTIONARY = {' in line:
in_dict = True
line = line.split('{', 1)[1].strip()
if not line: # If the line ends after {
continue
if not in_dict:
continue
# Process key-value pairs
if ':' in line:
# Handle multi-line entries
while not line.rstrip().endswith(','):
next_line = next(f, '').strip()
if not next_line:
break
line += ' ' + next_line
# Handle the last line which might end with }
line = line.split('}')[0].strip()
# Split into key-value pairs
entries = [e.strip() for e in line.split(',') if ':' in e]
for entry in entries:
try:
key_part, value_part = entry.split(':', 1)
key = key_part.strip().strip("'\"")
value = value_part.strip().strip("'\",}")
if key and value:
dictionary[key.lower()] = value
except (ValueError, IndexError):
continue
# Check for end of dictionary
if '}' in line and in_dict:
break
print(f"✓ Dictionary loaded successfully: {len(dictionary)} words")
return dictionary
except FileNotFoundError:
print(f"Warning: Dictionary file '{filename}' not found.")
print("Proceeding with model-only transliteration.")
return {}
except Exception as e:
print(f"Warning: Error loading dictionary: {str(e)}")
print("Proceeding with model-only transliteration.")
return {}
def get_transliteration(word, dictionary, transliterator, show_source=False):
"""
Get transliteration for a word.
First checks dictionary, then falls back to model.
Args:
word: English word to transliterate
dictionary: Dictionary mapping English to Hindi
transliterator: HindiTransliterator instance
show_source: If True, returns (transliteration, source)
Returns:
Transliteration string, or tuple (transliteration, source) if show_source=True
"""
word_lower = word.lower().strip()
# Check dictionary first
if word_lower in dictionary:
result = dictionary[word_lower]
if show_source:
return result, "dictionary"
return result
# Fall back to model
try:
model_result = transliterator.transliterate(word)
# Handle if model returns a list
if isinstance(model_result, list):
result = model_result[0] # Take first (best) result
else:
result = model_result
if show_source:
return result, "model"
return result
except Exception as e:
raise Exception(f"Transliteration failed: {str(e)}")
def main():
print("=" * 50)
print("Hindi Transliterator with Mythology Dictionary")
print("=" * 50)
# Load dictionary
print("\nLoading dictionary...")
dictionary = load_dictionary('dictionary.txt')
# Initialize transliterator
print("Initializing transliterator model...")
transliterator = HindiTransliterator()
print("✓ Model loaded successfully")
print("\nCommands:")
print(" 'exit' or 'quit' - Exit the program")
print(" 'stats' - Show statistics")
print(" 'check <word>' - Check if word is in dictionary")
print("-" * 50)
while True:
# Get user input
user_input = input("\nEnter English word to transliterate: ").strip()
# Check for exit condition
if user_input.lower() in ('exit', 'quit', 'q'):
print("Exiting transliterator...")
break
# Check for stats command
if user_input.lower() == 'stats':
print(f"\nDictionary Statistics:")
print(f" Total words in dictionary: {len(dictionary)}")
print(f" Coverage: Mythology, Vedic texts, Sanskrit terms")
continue
# Check for 'check' command
if user_input.lower().startswith('check '):
word_to_check = user_input[6:].strip().lower()
if word_to_check in dictionary:
print(f"✓ '{word_to_check}' is in dictionary: {dictionary[word_to_check]}")
else:
print(f"✗ '{word_to_check}' is NOT in dictionary (will use model)")
continue
if not user_input:
print("Please enter a word to transliterate.")
continue
try:
# Get transliteration with source info
transliteration, source = get_transliteration(
user_input,
dictionary,
transliterator,
show_source=True
)
# Display the result
print(f"\nTransliteration:")
print(f"English: {user_input}")
print(f"Hindi: {transliteration}")
print(f"Source: {source.upper()}")
# Add indicator for dictionary matches
if source == "dictionary":
print(" ✓ Verified from mythology dictionary")
else:
print(" ⚠ Generated by model (not in dictionary)")
# Save to file
with open('output_trans.txt', 'a', encoding='utf-8') as f:
f.write(f"{user_input} -> {transliteration} [{source}]\n")
except Exception as e:
print(f"Error: {str(e)}")
if __name__ == "__main__":
main()