import streamlit as st import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import load_model import pandas as pd import re # Load the trained model model = load_model('final_poetry_model.h5') # Load and preprocess dataset (for tokenizer) df = pd.read_csv('Roman-Urdu-Poetry.csv') def clean_text(text): text = text.lower() # Convert to lowercase text = re.sub(r"[^a-zA-Zñḳḍāī\s]", "", text) # Keep letters, diacritics, apostrophes text = re.sub(r'(\n)(\S)', r'\1 \2', text) return text df['Poetry'] = df['Poetry'].apply(clean_text) # Initialize and fit tokenizer tokenizer = Tokenizer(num_words=5000, filters='') tokenizer.fit_on_texts(df['Poetry']) total_words = len(tokenizer.word_index) + 1 # Function to generate poetry def generate_poem(seed_text, next_words, max_sequence_len): for _ in range(next_words): token_list = tokenizer.texts_to_sequences([seed_text])[0] token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre') predicted = model.predict(token_list, verbose=0) predicted_word_index = np.argmax(predicted, axis=1)[0] predicted_word = tokenizer.index_word.get(predicted_word_index, '') seed_text += " " + predicted_word return seed_text # Streamlit UI st.title("Poetry Generator") st.write("Enter a seed phrase to generate poetry in Roman Urdu!") seed_text = st.text_input("Enter seed text:") next_words = st.slider("Number of words to generate:", min_value=5, max_value=100, value=50) if st.button("Generate Poetry"): max_sequence_len = 225 # Ensure this matches your training setup generated_poem = generate_poem(clean_text(seed_text), next_words, max_sequence_len) st.subheader("Generated Poetry:") st.text(generated_poem)