Poetry-Gen / app.py
abdullah637's picture
Update app.py
5ed5596 verified
import streamlit as st
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import pandas as pd
import re
# Load the trained model
model = load_model('final_poetry_model.h5')
# Load and preprocess dataset (for tokenizer)
df = pd.read_csv('Roman-Urdu-Poetry.csv')
def clean_text(text):
text = text.lower() # Convert to lowercase
text = re.sub(r"[^a-zA-Zñḳḍāī\s]", "", text) # Keep letters, diacritics, apostrophes
text = re.sub(r'(\n)(\S)', r'\1 \2', text)
return text
df['Poetry'] = df['Poetry'].apply(clean_text)
# Initialize and fit tokenizer
tokenizer = Tokenizer(num_words=5000, filters='')
tokenizer.fit_on_texts(df['Poetry'])
total_words = len(tokenizer.word_index) + 1
# Function to generate poetry
def generate_poem(seed_text, next_words, max_sequence_len):
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
predicted = model.predict(token_list, verbose=0)
predicted_word_index = np.argmax(predicted, axis=1)[0]
predicted_word = tokenizer.index_word.get(predicted_word_index, '')
seed_text += " " + predicted_word
return seed_text
# Streamlit UI
st.title("Poetry Generator")
st.write("Enter a seed phrase to generate poetry in Roman Urdu!")
seed_text = st.text_input("Enter seed text:")
next_words = st.slider("Number of words to generate:", min_value=5, max_value=100, value=50)
if st.button("Generate Poetry"):
max_sequence_len = 225 # Ensure this matches your training setup
generated_poem = generate_poem(clean_text(seed_text), next_words, max_sequence_len)
st.subheader("Generated Poetry:")
st.text(generated_poem)