Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.layers import Input, Embedding, LSTM, Dense | |
| from tensorflow.keras.models import Model, load_model | |
| import pickle | |
| def load_model_files(modelToLoad): | |
| global tokenizer_e, tokenizer_f, max_len_tgt, max_len_src, model, START_ID, END_ID, index2word_f, word2index_f | |
| with open(f'{modelToLoad}/{modelToLoad}_it.pkl','rb') as f: | |
| tokenizer_e = pickle.load(f) | |
| with open(f'{modelToLoad}/{modelToLoad}_ot.pkl','rb') as f: | |
| tokenizer_f = pickle.load(f) | |
| with open(f'{modelToLoad}/{modelToLoad}_omlt.pkl','rb') as f: | |
| max_len_tgt = pickle.load(f) | |
| with open(f'{modelToLoad}/{modelToLoad}_imls.pkl','rb') as f: | |
| max_len_src = pickle.load(f) | |
| model = load_model(f'{modelToLoad}/{modelToLoad}.keras') | |
| # Set word mappings | |
| index2word_f = tokenizer_f.index_word | |
| word2index_f = tokenizer_f.word_index | |
| START_ID = word2index_f.get("start_") | |
| END_ID = word2index_f.get("_end") | |
| def prep_text(s): | |
| return " ".join(s.strip().lower().split()) | |
| def translate(en_text, max_len=50): | |
| global START_ID, END_ID, index2word_f | |
| en_text = prep_text(en_text) | |
| x1 = tokenizer_e.texts_to_sequences([en_text]) | |
| x1 = pad_sequences(x1, maxlen=max_len_src, padding='post') | |
| dec = [START_ID] | |
| for _ in range(min(max_len, max_len_tgt-1)): | |
| x2 = pad_sequences([dec], maxlen=max_len_tgt-1, padding='post') | |
| p = model.predict([x1, x2], verbose=0) | |
| next_id = int(np.argmax(p[0, len(dec)-1, :])) | |
| if next_id == 0: break | |
| if next_id == END_ID: break | |
| dec.append(next_id) | |
| words = [index2word_f.get(i, "") for i in dec[1:]] | |
| return " ".join([w for w in words if w]) | |
| # Load model files | |
| load_model_files('Hindi') | |