Spaces:
Sleeping
Sleeping
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.models import load_model | |
| import pickle | |
| import numpy as np | |
| # model yang ingin dimuat | |
| try: | |
| model = load_model("models/best_model.h5", compile=True) | |
| with open("models/tokenizer_input.pkl", 'rb') as f: | |
| tokenizer_inputs = pickle.load(f) | |
| with open("models/tokenizer_target.pkl", 'rb') as f: | |
| tokenizer_outputs = pickle.load(f) | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| raise | |
| # kalkulasi sample temperature agar lebih hangat generatif nya | |
| def sample_with_temperature(probs, temperature=1.0, top_k=None): | |
| if temperature != 1.0: | |
| probs = np.log(probs) / temperature | |
| probs = np.exp(probs) | |
| probs = probs / np.sum(probs) | |
| if top_k is not None: | |
| top_k_indices = np.argpartition(probs, -top_k)[-top_k:] | |
| top_k_probs = probs[top_k_indices] | |
| top_k_probs = top_k_probs / np.sum(top_k_probs) # Renormalize | |
| sampled_index = np.random.choice(top_k_indices, p=top_k_probs) | |
| else: | |
| sampled_index = np.random.choice(len(probs), p=probs) | |
| return sampled_index | |
| # fungsi prediksi teks | |
| def predict_with_main_model(user_text, tokenizer_input, tokenizer_target, model, | |
| max_len=15, temperature=1.0, top_k=None, max_encoder_len=9, max_decoder_len=15): | |
| if max_len is None: | |
| max_len = max_decoder_len | |
| input_seq = tokenizer_input.texts_to_sequences([user_text]) | |
| encoder_input = pad_sequences(input_seq, maxlen=max_encoder_len, padding='post') | |
| start_token = tokenizer_target.word_index.get('<sos>', 1) | |
| end_token = tokenizer_target.word_index.get('<eos>', 2) | |
| decoder_input = np.zeros((1, max_len - 1), dtype='int32') | |
| decoder_input[0, 0] = start_token | |
| decoded_tokens = [] | |
| for i in range(1, max_len - 1): | |
| predictions = model.predict([encoder_input, decoder_input], verbose=0) | |
| token_probs = predictions[0, i - 1] | |
| if top_k: | |
| token_id = sample_with_temperature(token_probs, temperature, top_k) | |
| else: | |
| token_id = np.argmax(token_probs) | |
| if token_id == end_token: | |
| break | |
| word = tokenizer_target.index_word.get(token_id, '') | |
| if word and word != '<sos>': | |
| decoded_tokens.append(word) | |
| decoder_input[0, i] = token_id | |
| return ' '.join(decoded_tokens) | |
| # ada penambahan riwayat pesan (memori) | |
| def chatbot(user_message, chat_history): | |
| chat_history = chat_history or [] | |
| response = predict_with_main_model( | |
| user_message, | |
| tokenizer_inputs, | |
| tokenizer_outputs, | |
| model, | |
| temperature=1.0, | |
| top_k=10 | |
| ) | |
| chat_history.append((user_message, response)) | |
| return "", chat_history |