Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pickle | |
| import tensorflow as tf | |
| import keras.ops as ops | |
| import keras | |
| from keras import layers | |
| from keras.layers import TextVectorization | |
| # from gradio_webrtc import WebRTC | |
| class TextVectorization(keras.layers.TextVectorization): | |
| pass | |
| class StringLookup(keras.layers.StringLookup): | |
| pass | |
| class TransformerEncoder(layers.Layer): | |
| def __init__(self, embed_dim, dense_dim, num_heads, **kwargs): | |
| super().__init__(**kwargs) | |
| self.embed_dim = embed_dim | |
| self.dense_dim = dense_dim | |
| self.num_heads = num_heads | |
| self.attention = layers.MultiHeadAttention( | |
| num_heads=num_heads, key_dim=embed_dim | |
| ) | |
| self.dense_proj = keras.Sequential( | |
| [ | |
| layers.Dense(dense_dim, activation="relu"), | |
| layers.Dense(embed_dim), | |
| ] | |
| ) | |
| self.layernorm_1 = layers.LayerNormalization() | |
| self.layernorm_2 = layers.LayerNormalization() | |
| self.supports_masking = True | |
| def call(self, inputs, mask=None): | |
| if mask is not None: | |
| padding_mask = ops.cast(mask[:, None, :], dtype="int32") | |
| else: | |
| padding_mask = None | |
| attention_output = self.attention( | |
| query=inputs, value=inputs, key=inputs, attention_mask=padding_mask | |
| ) | |
| proj_input = self.layernorm_1(inputs + attention_output) | |
| proj_output = self.dense_proj(proj_input) | |
| return self.layernorm_2(proj_input + proj_output) | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update( | |
| { | |
| "embed_dim": self.embed_dim, | |
| "dense_dim": self.dense_dim, | |
| "num_heads": self.num_heads, | |
| } | |
| ) | |
| return config | |
| class PositionalEmbedding(layers.Layer): | |
| def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs): | |
| super().__init__(**kwargs) | |
| self.token_embeddings = layers.Embedding( | |
| input_dim=vocab_size, output_dim=embed_dim | |
| ) | |
| self.position_embeddings = layers.Embedding( | |
| input_dim=sequence_length, output_dim=embed_dim | |
| ) | |
| self.sequence_length = sequence_length | |
| self.vocab_size = vocab_size | |
| self.embed_dim = embed_dim | |
| def call(self, inputs): | |
| length = ops.shape(inputs)[-1] | |
| positions = ops.arange(0, length, 1) | |
| embedded_tokens = self.token_embeddings(inputs) | |
| embedded_positions = self.position_embeddings(positions) | |
| return embedded_tokens + embedded_positions | |
| def compute_mask(self, inputs, mask=None): | |
| return ops.not_equal(inputs, 0) | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update( | |
| { | |
| "sequence_length": self.sequence_length, | |
| "vocab_size": self.vocab_size, | |
| "embed_dim": self.embed_dim, | |
| } | |
| ) | |
| return config | |
| class TransformerDecoder(layers.Layer): | |
| def __init__(self, embed_dim, latent_dim, num_heads, **kwargs): | |
| super().__init__(**kwargs) | |
| self.embed_dim = embed_dim | |
| self.latent_dim = latent_dim | |
| self.num_heads = num_heads | |
| self.attention_1 = layers.MultiHeadAttention( | |
| num_heads=num_heads, key_dim=embed_dim | |
| ) | |
| self.attention_2 = layers.MultiHeadAttention( | |
| num_heads=num_heads, key_dim=embed_dim | |
| ) | |
| self.dense_proj = keras.Sequential( | |
| [ | |
| layers.Dense(latent_dim, activation="relu"), | |
| layers.Dense(embed_dim), | |
| ] | |
| ) | |
| self.layernorm_1 = layers.LayerNormalization() | |
| self.layernorm_2 = layers.LayerNormalization() | |
| self.layernorm_3 = layers.LayerNormalization() | |
| self.supports_masking = True | |
| def call(self, inputs, mask=None): | |
| inputs, encoder_outputs = inputs | |
| causal_mask = self.get_causal_attention_mask(inputs) | |
| if mask is None: | |
| inputs_padding_mask, encoder_outputs_padding_mask = None, None | |
| else: | |
| inputs_padding_mask, encoder_outputs_padding_mask = mask | |
| attention_output_1 = self.attention_1( | |
| query=inputs, | |
| value=inputs, | |
| key=inputs, | |
| attention_mask=causal_mask, | |
| query_mask=inputs_padding_mask, | |
| ) | |
| out_1 = self.layernorm_1(inputs + attention_output_1) | |
| attention_output_2 = self.attention_2( | |
| query=out_1, | |
| value=encoder_outputs, | |
| key=encoder_outputs, | |
| query_mask=inputs_padding_mask, | |
| key_mask=encoder_outputs_padding_mask, | |
| ) | |
| out_2 = self.layernorm_2(out_1 + attention_output_2) | |
| proj_output = self.dense_proj(out_2) | |
| return self.layernorm_3(out_2 + proj_output) | |
| def get_causal_attention_mask(self, inputs): | |
| input_shape = ops.shape(inputs) | |
| batch_size, sequence_length = input_shape[0], input_shape[1] | |
| i = ops.arange(sequence_length)[:, None] | |
| j = ops.arange(sequence_length) | |
| mask = ops.cast(i >= j, dtype="int32") | |
| mask = ops.reshape(mask, (1, input_shape[1], input_shape[1])) | |
| mult = ops.concatenate( | |
| [ops.expand_dims(batch_size, -1), ops.convert_to_tensor([1, 1])], | |
| axis=0, | |
| ) | |
| return ops.tile(mask, mult) | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update( | |
| { | |
| "embed_dim": self.embed_dim, | |
| "latent_dim": self.latent_dim, | |
| "num_heads": self.num_heads, | |
| } | |
| ) | |
| return config | |
| with open("id_vectorization_transformer.pickle", "rb") as file: | |
| from_disk = pickle.load(file) | |
| id_vectorization = TextVectorization.from_config(from_disk['config']) | |
| id_vectorization.adapt(tf.data.Dataset.from_tensor_slices(["xyz"])) | |
| id_vectorization.set_weights(from_disk['weights']) | |
| id_vectorization.set_vocabulary(from_disk["vocab"]) | |
| with open("en_vectorization_transformer.pickle", "rb") as file: | |
| from_disk = pickle.load(file) | |
| en_vectorization = TextVectorization.from_config(from_disk['config']) | |
| en_vectorization.adapt(tf.data.Dataset.from_tensor_slices(["xyz"])) | |
| en_vectorization.set_weights(from_disk['weights']) | |
| en_vectorization.set_vocabulary(from_disk["vocab"]) | |
| transformer = keras.models.load_model( | |
| "transformer_keras.keras", | |
| custom_objects={"TransformerEncoder": TransformerEncoder, "TransformerDecoder": TransformerDecoder, "PositionalEmbedding": PositionalEmbedding} | |
| ) | |
| id_vocab = id_vectorization.get_vocabulary() | |
| id_index_lookup = dict(zip(range(len(id_vocab)), id_vocab)) | |
| max_decoded_sentence_lenth = 20 | |
| def decode_sequence(input_sentence): | |
| tokenized_input_sentence = en_vectorization([input_sentence]) | |
| decoded_sentence = "[start]" | |
| for i in range(max_decoded_sentence_lenth): | |
| tokenized_target_sentence = id_vectorization([decoded_sentence])[:, :-1] | |
| predictions = transformer( | |
| { | |
| "encoder_inputs": tokenized_input_sentence, | |
| "decoder_inputs": tokenized_target_sentence, | |
| } | |
| ) | |
| sampled_token_index = ops.convert_to_numpy( | |
| ops.argmax(predictions[0, i, :]) | |
| ).item(0) | |
| sampled_token = id_index_lookup[sampled_token_index] | |
| decoded_sentence += " " + sampled_token | |
| if sampled_token == "end": | |
| break | |
| return decoded_sentence.replace("[start]", "").replace("end", "").lstrip().rstrip() | |
| # image = WebRTC(label="Stream") | |
| desc=("<h2>This is a simple English to Indonesian translator app using transformer for our final Deep Learning Project.</h2>" + | |
| "<br/> <h3 style='font-weight: bold'>Team Members:</h3>"+ | |
| "<br/> <ul> <li>2602082452 - Rendy Susanto</li>" + | |
| "<li>2602082452 - Rendy Susanto</li></ul>") | |
| demo = gr.Interface( | |
| fn=decode_sequence, | |
| inputs=gr.Textbox(label="Please input your text (English):"), | |
| outputs=gr.Textbox(label="Output (Indonesian):"), | |
| title="English To Indonesian Translator", | |
| description=desc | |
| ) | |
| demo.launch(share=True) | |