File size: 6,970 Bytes
8a02978 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | import sys
import io
import ast
# Set UTF-8 encoding for Windows console
if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
from hindi_xlit import HindiTransliterator
def load_dictionary(filename='dictionary.txt'):
"""
Load the mythology dictionary from file.
Returns a dictionary mapping English words to Hindi transliterations.
"""
dictionary = {}
try:
with open(filename, 'r', encoding='utf-8') as f:
in_dict = False
for line in f:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Check if we're in the dictionary section
if 'MYTHOLOGY_DICTIONARY = {' in line:
in_dict = True
line = line.split('{', 1)[1].strip()
if not line: # If the line ends after {
continue
if not in_dict:
continue
# Process key-value pairs
if ':' in line:
# Handle multi-line entries
while not line.rstrip().endswith(','):
next_line = next(f, '').strip()
if not next_line:
break
line += ' ' + next_line
# Handle the last line which might end with }
line = line.split('}')[0].strip()
# Split into key-value pairs
entries = [e.strip() for e in line.split(',') if ':' in e]
for entry in entries:
try:
key_part, value_part = entry.split(':', 1)
key = key_part.strip().strip("'\"")
value = value_part.strip().strip("'\",}")
if key and value:
dictionary[key.lower()] = value
except (ValueError, IndexError):
continue
# Check for end of dictionary
if '}' in line and in_dict:
break
print(f"✓ Dictionary loaded successfully: {len(dictionary)} words")
return dictionary
except FileNotFoundError:
print(f"Warning: Dictionary file '{filename}' not found.")
print("Proceeding with model-only transliteration.")
return {}
except Exception as e:
print(f"Warning: Error loading dictionary: {str(e)}")
print("Proceeding with model-only transliteration.")
return {}
def get_transliteration(word, dictionary, transliterator, show_source=False):
"""
Get transliteration for a word.
First checks dictionary, then falls back to model.
Args:
word: English word to transliterate
dictionary: Dictionary mapping English to Hindi
transliterator: HindiTransliterator instance
show_source: If True, returns (transliteration, source)
Returns:
Transliteration string, or tuple (transliteration, source) if show_source=True
"""
word_lower = word.lower().strip()
# Check dictionary first
if word_lower in dictionary:
result = dictionary[word_lower]
if show_source:
return result, "dictionary"
return result
# Fall back to model
try:
model_result = transliterator.transliterate(word)
# Handle if model returns a list
if isinstance(model_result, list):
result = model_result[0] # Take first (best) result
else:
result = model_result
if show_source:
return result, "model"
return result
except Exception as e:
raise Exception(f"Transliteration failed: {str(e)}")
def main():
print("=" * 50)
print("Hindi Transliterator with Mythology Dictionary")
print("=" * 50)
# Load dictionary
print("\nLoading dictionary...")
dictionary = load_dictionary('dictionary.txt')
# Initialize transliterator
print("Initializing transliterator model...")
transliterator = HindiTransliterator()
print("✓ Model loaded successfully")
print("\nCommands:")
print(" 'exit' or 'quit' - Exit the program")
print(" 'stats' - Show statistics")
print(" 'check <word>' - Check if word is in dictionary")
print("-" * 50)
while True:
# Get user input
user_input = input("\nEnter English word to transliterate: ").strip()
# Check for exit condition
if user_input.lower() in ('exit', 'quit', 'q'):
print("Exiting transliterator...")
break
# Check for stats command
if user_input.lower() == 'stats':
print(f"\nDictionary Statistics:")
print(f" Total words in dictionary: {len(dictionary)}")
print(f" Coverage: Mythology, Vedic texts, Sanskrit terms")
continue
# Check for 'check' command
if user_input.lower().startswith('check '):
word_to_check = user_input[6:].strip().lower()
if word_to_check in dictionary:
print(f"✓ '{word_to_check}' is in dictionary: {dictionary[word_to_check]}")
else:
print(f"✗ '{word_to_check}' is NOT in dictionary (will use model)")
continue
if not user_input:
print("Please enter a word to transliterate.")
continue
try:
# Get transliteration with source info
transliteration, source = get_transliteration(
user_input,
dictionary,
transliterator,
show_source=True
)
# Display the result
print(f"\nTransliteration:")
print(f"English: {user_input}")
print(f"Hindi: {transliteration}")
print(f"Source: {source.upper()}")
# Add indicator for dictionary matches
if source == "dictionary":
print(" ✓ Verified from mythology dictionary")
else:
print(" ⚠ Generated by model (not in dictionary)")
# Save to file
with open('output_trans.txt', 'a', encoding='utf-8') as f:
f.write(f"{user_input} -> {transliteration} [{source}]\n")
except Exception as e:
print(f"Error: {str(e)}")
if __name__ == "__main__":
main() |