import re import tensorflow as tf import numpy as np import streamlit as st BATCH_SIZE = 128 NUM_HEADS = 12 NUM_BLOCKS = 2 EMBED_DIM = 384 DENSE_DIM = 1536 DROPOUT_RATE = 0.3 CHUNK_LENGTH = 256 vocab_size = 12050 sequence_length = CHUNK_LENGTH+1 def custom_standardization(input_string): lowercase = tf.strings.lower(input_string) return tf.strings.regex_replace( lowercase, f"[{re.escape(strip_chars)}]", "") vectorized_model = tf.keras.models.load_model( "src/ShakespeareVect.keras", custom_objects={"custom_standardization": custom_standardization} ) vectorizer = vectorized_model.layers[0] class PositionalEmbedding(tf.keras.layers.Layer): def __init__(self, sequence_length, vocab_size, output_dim): super().__init__() self.positional_embedding = tf.keras.layers.Embedding(input_dim = sequence_length, output_dim = output_dim, mask_zero=False) self.token_embedding = tf.keras.layers.Embedding(input_dim = vocab_size, output_dim= output_dim, mask_zero=True) self.sequence_length = sequence_length self.vocab_size = vocab_size def call(self, inputs): length = tf.shape(inputs)[-1] positions = tf.range(start=0, limit=length, delta=1) embedded_tokens = self.token_embedding(inputs) embedded_positions = self.positional_embedding(positions) return embedded_tokens + embedded_positions class TransformerDecoder(tf.keras.layers.Layer): def __init__(self, num_heads, embed_dim, dense_dim, dropout_rate): super().__init__() self.attention = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim//num_heads) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-5) self.dropout1 = tf.keras.layers.Dropout(dropout_rate) self.dense_proj = tf.keras.models.Sequential([ tf.keras.layers.Dense(dense_dim, activation='gelu'), tf.keras.layers.Dense(embed_dim) ]) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-5) self.dropout2 = tf.keras.layers.Dropout(dropout_rate) def call(self, inputs): attn_out = self.attention(query=inputs, key=inputs, value=inputs, use_causal_mask=True) norm1_out = self.layernorm1(attn_out+inputs) drop1_out = self.dropout1(norm1_out) dense_proj_out = self.dense_proj(drop1_out) norm2_out = self.layernorm2(drop1_out+dense_proj_out) drop2_out = self.dropout2(norm2_out) return drop2_out inputs = tf.keras.layers.Input(shape=(None,)) embeddings = PositionalEmbedding(sequence_length, vocab_size, EMBED_DIM)(inputs) x = embeddings for layer in range(NUM_BLOCKS): x = TransformerDecoder(NUM_HEADS, EMBED_DIM, DENSE_DIM, DROPOUT_RATE)(x) x = tf.keras.layers.Dropout(0.3)(x) output = tf.keras.layers.Dense(vocab_size, activation='linear', kernel_initializer='glorot_uniform')(x) transformer = tf.keras.models.Model(inputs, output) transformer.load_weights('src/Shakespeare_decoder.weights (1).h5') def generate_text(prompt, max_length=50, temperature=1.0): for _ in range(max_length): tokenized = vectorizer([prompt]) tokenized_np = tokenized.numpy()[0] # Find the last non-padding token last_idx = np.max(np.nonzero(tokenized_np)) preds = transformer(tokenized, training=False) logits = preds[0, last_idx, :].numpy() # Apply temperature sampling probs = np.exp(logits / temperature) / np.sum(np.exp(logits / temperature)) next_id = np.random.choice(len(probs), p=probs) next_word = vectorizer.get_vocabulary()[next_id] if next_word in ("", "[UNK]"): break prompt += " " + next_word return prompt user_input = st.text_input("Enter some text:", "") if user_input != "": with st.spinner("Generating Text..."): text = generate_text(user_input) st.text(text)