| from guidance import models, select | |
| import nltk | |
| from nltk import FreqDist | |
| from nltk.util import ngrams | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| import string | |
| nltk.download('punkt') | |
| from romanize import uroman | |
| llm = models.LlamaCpp('models/neural-chat-7b-v3-3.Q2_K.gguf', n_gpu_layers=1) | |
| hin_str = 'हमारे परमेश्वर और प्रभु यीशु मसीह के पिता का धन्यवाद हो कि उसने हमें मसीह में स्वर्गीय स्थानों में सब प्रकार की आत्मिक आशीष* दी है।' | |
| hin_str = uroman(hin_str) | |
| greek_term = "ἐν Χριστῷ" | |
| greek_term = uroman(greek_term) | |
| tokens= word_tokenize(hin_str.lower()) # Tokenize and normalize case | |
| tokens = [token for token in tokens if token not in string.punctuation] | |
| all_ngrams = [] | |
| for n in range(2, 4): | |
| all_ngrams.extend(ngrams(tokens, n)) | |
| all_ngrams = [x[0] + x[1] for x in all_ngrams] | |
| all_ngrams = [] | |
| print(all_ngrams) | |
| lm = llm | |
| lm += f'The best translation of {greek_term} from Greek into Hindi is ' | |
| lm += select(all_ngrams, name='ngram') | |
| print(lm['ngram']) |