Jabrain commited on
Commit
4b03ca9
·
1 Parent(s): 4a4e6b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -13
app.py CHANGED
@@ -1,20 +1,84 @@
1
-
2
  import streamlit as st
3
- from transformers import T5Tokenizer, T5ForConditionalGeneration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- st.title("Text-to-Text AI Model")
 
6
 
7
- st.write("Enter your prompt below:")
8
 
9
- prompt = st.text_input("Prompt")
10
 
11
- if prompt:
12
- tokenizer = T5Tokenizer.from_pretrained('t5-small')
13
- model = T5ForConditionalGeneration.from_pretrained('t5-small')
14
 
15
- input_ids = tokenizer.encode(prompt, return_tensors='pt')
16
- output = model.generate(input_ids)
17
- output_text = tokenizer.decode(output[0], skip_special_tokens=True)
18
 
19
- st.write("Output:")
20
- st.write(output_text)
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ # Import libraries
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ from tensorflow import keras
6
+ from tensorflow.keras import layers
7
+
8
+ # Load the text data
9
+ text = open('shakespeare.txt', 'r').read() # Read the text file
10
+ vocab = sorted(set(text)) # Get the unique characters in the text
11
+ char2idx = {c: i for i, c in enumerate(vocab)} # Map characters to indices
12
+ idx2char = np.array(vocab) # Map indices to characters
13
+ text_as_int = np.array([char2idx[c] for c in text]) # Convert text to integers
14
+
15
+ # Create training examples and targets
16
+ seq_length = 100 # Length of the input sequence
17
+ examples_per_epoch = len(text) // (seq_length + 1) # Number of examples per epoch
18
+ char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) # Create a dataset from the text
19
+ sequences = char_dataset.batch(seq_length + 1, drop_remainder=True) # Create batches of sequences
20
+
21
+ def split_input_target(chunk): # Define a function to split the input and target
22
+ input_text = chunk[:-1] # Input is the sequence except the last character
23
+ target_text = chunk[1:] # Target is the sequence except the first character
24
+ return input_text, target_text
25
+
26
+ dataset = sequences.map(split_input_target) # Apply the function to the dataset
27
+
28
+ # Shuffle and batch the dataset
29
+ BATCH_SIZE = 64 # Batch size
30
+ BUFFER_SIZE = 10000 # Buffer size for shuffling
31
+ dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) # Shuffle and batch the dataset
32
+
33
+ # Define the model
34
+ vocab_size = len(vocab) # Size of the vocabulary
35
+ embedding_dim = 256 # Dimension of the embedding layer
36
+ rnn_units = 1024 # Number of units in the RNN layer
37
+
38
+ model = keras.Sequential([
39
+ layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[BATCH_SIZE, None]), # Embedding layer
40
+ layers.GRU(rnn_units, return_sequences=True, stateful=True), # GRU layer
41
+ layers.Dense(vocab_size) # Dense layer with vocab_size units
42
+ ])
43
+
44
+ # Define the loss function
45
+ def loss(labels, logits):
46
+ return keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
47
+
48
+ # Compile the model
49
+ model.compile(optimizer='adam', loss=loss)
50
+
51
+ # Define a function to generate text
52
+ def generate_text(model, start_string):
53
+ num_generate = 50 # Number of characters to generate
54
+ input_eval = [char2idx[s] for s in start_string] # Convert the start string to numbers
55
+ input_eval = tf.expand_dims(input_eval, 0) # Expand the dimension for batch size
56
+ text_generated = [] # Empty list to store the generated text
57
+
58
+ temperature = 1.0 # Temperature parameter to control the randomness
59
+
60
+ model.reset_states() # Reset the states of the model
61
+
62
+ for i in range(num_generate): # Loop over the number of characters to generate
63
+ predictions = model(input_eval) # Get the predictions from the model
64
+ predictions = tf.squeeze(predictions, 0) # Remove the batch dimension
65
 
66
+ predictions = predictions / temperature # Divide by temperature to increase or decrease randomness
67
+ predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy() # Sample from the predictions
68
 
69
+ input_eval = tf.expand_dims([predicted_id], 0) # Update the input with the predicted id
70
 
71
+ text_generated.append(idx2char[predicted_id]) # Append the predicted character to the generated text
72
 
73
+ return (start_string + ''.join(text_generated)) # Return the start string and the generated text
 
 
74
 
75
+ # Train the model
76
+ EPOCHS = 1 # Number of epochs to train
 
77
 
78
+ for epoch in range(EPOCHS): # Loop over the epochs
79
+ print(f'Epoch {epoch + 1}')
80
+ model.fit(dataset, epochs=1) # Fit the model on the dataset for one epoch
81
+
82
+ start_string = 'ROMEO: ' # Define a start string to generate text from
83
+
84
+ print(generate_text(model, start_string)) # Print the generated text