File size: 1,694 Bytes
8c457b5 7550359 83b3714 8329c09 8c457b5 7550359 8c457b5 5f37b42 8c457b5 e0b2a67 8c457b5 1c861fc 5933d22 1c861fc e0b2a67 8c457b5 1c861fc e0b2a67 8c457b5 db03c6f 1c861fc 5f37b42 1c861fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import streamlit as st
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# Initialize the tokenizer and model
model_name = 'gpt2-large'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
# Set the title for the Streamlit app
st.title("GPT-2 Blog Post Generator")
# Text input for the user
text = st.text_area("Enter your Topic: ")
def generate_text(text):
try:
# Encode input text
encoded_input = tokenizer(text, return_tensors='pt')
# Generate text
output = model.generate(
input_ids=encoded_input['input_ids'],
max_length=200, # Specify the max length for the generated text
num_return_sequences=1, # Number of sequences to generate
no_repeat_ngram_size=2, # Avoid repeating n-grams of length 2
top_k=50, # Limits the sampling pool to top_k tokens
top_p=0.95, # Cumulative probability threshold for nucleus sampling
temperature=0.7, # Controls the randomness of predictions
attention_mask=encoded_input['attention_mask'], # Correct attention mask
pad_token_id=tokenizer.eos_token_id # Use the end-of-sequence token as padding
)
# Decode generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
return generated_text
except Exception as e:
st.error(f"An error occurred: {e}")
return None
if st.button("Generate"):
generated_text = generate_text(text)
if generated_text:
# Display the generated text
st.subheader("Generated Blog Post")
st.write(generated_text) |