File size: 6,970 Bytes
8a02978
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import sys
import io
import ast

# Set UTF-8 encoding for Windows console
if sys.platform == 'win32':
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')

from hindi_xlit import HindiTransliterator

def load_dictionary(filename='dictionary.txt'):
    """
    Load the mythology dictionary from file.
    Returns a dictionary mapping English words to Hindi transliterations.
    """
    dictionary = {}
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            in_dict = False
            for line in f:
                line = line.strip()
                
                # Skip empty lines and comments
                if not line or line.startswith('#'):
                    continue
                    
                # Check if we're in the dictionary section
                if 'MYTHOLOGY_DICTIONARY = {' in line:
                    in_dict = True
                    line = line.split('{', 1)[1].strip()
                    if not line:  # If the line ends after {
                        continue
                
                if not in_dict:
                    continue
                    
                # Process key-value pairs
                if ':' in line:
                    # Handle multi-line entries
                    while not line.rstrip().endswith(','):
                        next_line = next(f, '').strip()
                        if not next_line:
                            break
                        line += ' ' + next_line
                    
                    # Handle the last line which might end with }
                    line = line.split('}')[0].strip()
                    
                    # Split into key-value pairs
                    entries = [e.strip() for e in line.split(',') if ':' in e]
                    
                    for entry in entries:
                        try:
                            key_part, value_part = entry.split(':', 1)
                            key = key_part.strip().strip("'\"")
                            value = value_part.strip().strip("'\",}")
                            if key and value:
                                dictionary[key.lower()] = value
                        except (ValueError, IndexError):
                            continue
                
                # Check for end of dictionary
                if '}' in line and in_dict:
                    break
        
        print(f"✓ Dictionary loaded successfully: {len(dictionary)} words")
        return dictionary
        
    except FileNotFoundError:
        print(f"Warning: Dictionary file '{filename}' not found.")
        print("Proceeding with model-only transliteration.")
        return {}
    except Exception as e:
        print(f"Warning: Error loading dictionary: {str(e)}")
        print("Proceeding with model-only transliteration.")
        return {}

def get_transliteration(word, dictionary, transliterator, show_source=False):
    """
    Get transliteration for a word.
    First checks dictionary, then falls back to model.
    
    Args:
        word: English word to transliterate
        dictionary: Dictionary mapping English to Hindi
        transliterator: HindiTransliterator instance
        show_source: If True, returns (transliteration, source)
    
    Returns:
        Transliteration string, or tuple (transliteration, source) if show_source=True
    """
    word_lower = word.lower().strip()
    
    # Check dictionary first
    if word_lower in dictionary:
        result = dictionary[word_lower]
        if show_source:
            return result, "dictionary"
        return result
    
    # Fall back to model
    try:
        model_result = transliterator.transliterate(word)
        
        # Handle if model returns a list
        if isinstance(model_result, list):
            result = model_result[0]  # Take first (best) result
        else:
            result = model_result
        
        if show_source:
            return result, "model"
        return result
        
    except Exception as e:
        raise Exception(f"Transliteration failed: {str(e)}")

def main():
    print("=" * 50)
    print("Hindi Transliterator with Mythology Dictionary")
    print("=" * 50)
    
    # Load dictionary
    print("\nLoading dictionary...")
    dictionary = load_dictionary('dictionary.txt')
    
    # Initialize transliterator
    print("Initializing transliterator model...")
    transliterator = HindiTransliterator()
    print("✓ Model loaded successfully")
    
    print("\nCommands:")
    print("  'exit' or 'quit' - Exit the program")
    print("  'stats' - Show statistics")
    print("  'check <word>' - Check if word is in dictionary")
    print("-" * 50)
    
    while True:
        # Get user input
        user_input = input("\nEnter English word to transliterate: ").strip()
        
        # Check for exit condition
        if user_input.lower() in ('exit', 'quit', 'q'):
            print("Exiting transliterator...")
            break
        
        # Check for stats command
        if user_input.lower() == 'stats':
            print(f"\nDictionary Statistics:")
            print(f"  Total words in dictionary: {len(dictionary)}")
            print(f"  Coverage: Mythology, Vedic texts, Sanskrit terms")
            continue
        
        # Check for 'check' command
        if user_input.lower().startswith('check '):
            word_to_check = user_input[6:].strip().lower()
            if word_to_check in dictionary:
                print(f"✓ '{word_to_check}' is in dictionary: {dictionary[word_to_check]}")
            else:
                print(f"✗ '{word_to_check}' is NOT in dictionary (will use model)")
            continue
            
        if not user_input:
            print("Please enter a word to transliterate.")
            continue
            
        try:
            # Get transliteration with source info
            transliteration, source = get_transliteration(
                user_input, 
                dictionary, 
                transliterator, 
                show_source=True
            )
            
            # Display the result
            print(f"\nTransliteration:")
            print(f"English: {user_input}")
            print(f"Hindi:   {transliteration}")
            print(f"Source:  {source.upper()}")
            
            # Add indicator for dictionary matches
            if source == "dictionary":
                print("         ✓ Verified from mythology dictionary")
            else:
                print("         ⚠ Generated by model (not in dictionary)")
            
            # Save to file
            with open('output_trans.txt', 'a', encoding='utf-8') as f:
                f.write(f"{user_input} -> {transliteration} [{source}]\n")
                
        except Exception as e:
            print(f"Error: {str(e)}")

if __name__ == "__main__":
    main()