Spaces:
Sleeping
Sleeping
File size: 1,918 Bytes
d710a1c 226d900 d710a1c 226d900 d710a1c be02803 d710a1c 5ed5596 d710a1c e9beec4 d710a1c 5e1358f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | import streamlit as st
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import pandas as pd
import re
# Load the trained model
model = load_model('final_poetry_model.h5')
# Load and preprocess dataset (for tokenizer)
df = pd.read_csv('Roman-Urdu-Poetry.csv')
def clean_text(text):
text = text.lower() # Convert to lowercase
text = re.sub(r"[^a-zA-Zñḳḍāī\s]", "", text) # Keep letters, diacritics, apostrophes
text = re.sub(r'(\n)(\S)', r'\1 \2', text)
return text
df['Poetry'] = df['Poetry'].apply(clean_text)
# Initialize and fit tokenizer
tokenizer = Tokenizer(num_words=5000, filters='')
tokenizer.fit_on_texts(df['Poetry'])
total_words = len(tokenizer.word_index) + 1
# Function to generate poetry
def generate_poem(seed_text, next_words, max_sequence_len):
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
predicted = model.predict(token_list, verbose=0)
predicted_word_index = np.argmax(predicted, axis=1)[0]
predicted_word = tokenizer.index_word.get(predicted_word_index, '')
seed_text += " " + predicted_word
return seed_text
# Streamlit UI
st.title("Poetry Generator")
st.write("Enter a seed phrase to generate poetry in Roman Urdu!")
seed_text = st.text_input("Enter seed text:")
next_words = st.slider("Number of words to generate:", min_value=5, max_value=100, value=50)
if st.button("Generate Poetry"):
max_sequence_len = 225 # Ensure this matches your training setup
generated_poem = generate_poem(clean_text(seed_text), next_words, max_sequence_len)
st.subheader("Generated Poetry:")
st.text(generated_poem)
|