Spaces:

GS123
/

hp_world

Sleeping

App Files Files Community

GS123 commited on Jan 24, 2025

Commit

29b7f2d

verified ·

1 Parent(s): 276bb84

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
app.py +161 -0
hp_background.jpg +0 -0
hp_model.keras +3 -0
logo.png +0 -0
requirements.txt +10 -0
tokenizer.joblib +3 -0
word2vec_model.model +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+hp_model.keras filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import tensorflow as tf
+import regex as re
+import joblib
+from tensorflow.keras.utils import pad_sequences
+import base64
+from gensim.models import Word2Vec
+from sklearn.decomposition import PCA
+st.markdown(
+    '<p style="color:white; font-size:40px; text-align: center;">Harry Potter text generation app</p>',
+    unsafe_allow_html=True
+)
+# Function to set the background image
+def set_background_image(image_path):
+    """
+    Set a background image in the Streamlit app using base64 encoding.
+    Parameters:
+    - image_path: str, path to the image file (e.g., 'background.jpg')
+    """
+    # Read and encode the image
+    with open(image_path, "rb") as image_file:
+        base64_image = base64.b64encode(image_file.read()).decode()
+    # Create the CSS for the background
+    background_css = f"""
+    <style>
+    .stApp {{
+        background-image: url("data:image/jpeg;base64,{base64_image}");
+        background-size: cover;
+        background-position: center;
+        background-attachment: fixed;
+    }}
+    </style>
+    """
+    # Inject the CSS into the Streamlit app
+    st.markdown(background_css, unsafe_allow_html=True)
+# Set the background image
+set_background_image("hp_background.jpg")
+st.logo("logo.png", size = "large")
+des = '''This app takes sample input from user and
+        generate number of words from harry potter books
+          as given by user'''
+st.markdown(
+    f'<p style="color:white; font-size:15px; text-align: center;">{des}</p>',
+    unsafe_allow_html=True
+)
+# load model
+@st.cache_resource
+def cache_model(tf_model_add, tk_add, w2v_add):
+    model = tf.keras.models.load_model(tf_model_add)
+    tk = joblib.load(tk_add)
+    wv_model = Word2Vec.load(w2v_add)
+    return model, tk, wv_model
+tf_model_add = "hp_model.keras"
+tk_add = "tokenizer.joblib"
+w2v_add = "word2vec_model.model"
+model, tk, wv_model = cache_model(tf_model_add, tk_add, w2v_add)
+with st.sidebar:
+    chr_name = st.text_input("Enter a character name to get top 5 similar characters")
+    if chr_name:
+        try:
+            result = []
+            for i in wv_model.wv.most_similar(chr_name.lower(), topn = 5):
+                result.append(i[0])
+            for j in result:
+                st.markdown("- " + j)
+        except:
+            st.write("Please enter a valid character name")
+    chrs = st.multiselect(
+        "Select names to draw there vectors",
+        sorted(wv_model.wv.key_to_index.keys(), reverse = True),
+        ["harry", "ron", "voldemort", "dobby", "elf"]
+    )
+    draw_vector_pressed = st.button("Draw vectors")
+text = st.text_input("Enter Sample text to generate data")
+num_words = st.number_input("Enter number of words to generate by model: ",
+                            min_value= 1, max_value= 50, step = 1,
+                            value = 5)
+def clean_text(book):
+  book = book.lower()
+  exp = r"page\s*\|\s*\d+\s*harry potter.*?rowling"
+  book = re.sub(exp, " ", book)
+  alphabet_regex = "[^a-zA-Z0-9 .]+"
+  book = re.sub(alphabet_regex, "", book)
+  space_regex = "\s\s+"
+  book = re.sub(space_regex, " ", book)
+  return book
+index_word = {v:k for k,v in tk.word_index.items()}
+def next_word(test):
+  test_clean = clean_text(test)
+  test_token = tk.texts_to_sequences([test_clean])
+  pad_test = pad_sequences(test_token, maxlen =192, padding = "pre")
+  # pad_test
+  y_pred_prob = model.predict(pad_test)
+  y_pred_ind = np.argmax(y_pred_prob, axis = -1)
+  text = index_word[y_pred_ind[0]]
+  return text
+if st.button("Submit"):
+    if len(text) < 1:
+        st.write("#### Please enter text to generate words")
+    else:
+       for i in range(num_words):
+            word = next_word(text)
+            # print(test + " " + word)
+            text = text + " " + word
+    st.write(text)
+if draw_vector_pressed == True:
+    if len(chrs) > 0:
+        chr_df = pd.DataFrame(data = wv_model.wv[chrs], index = chrs)
+        pca = PCA(n_components=2)
+        pca_array = pca.fit_transform(chr_df)
+        df_pca = pd.DataFrame(pca_array, index = chr_df.index, columns = ["pc1", "pc2"]).reset_index()
+        st.write("### Vector diagram for characters")
+        st.scatter_chart(   df_pca,
+                            x="pc1",
+                            y="pc2",
+                            color="index")
+    else:
+        st.write("Please select characters to draw vectors")

hp_background.jpg ADDED Viewed

hp_model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49c35e22cc4c65f44e2eb1477eacd26c90d532c49700c3a43f983d00998eb514
+size 17527806

logo.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+tensorflow==2.17.1
+gensim==4.3.3
+joblib==1.4.2
+numpy==1.26.4
+pandas==2.2.2
+matplotlib==3.10.0
+regex==2024.11.6
+scikit-learn==1.6.0
+seaborn==0.13.2
+streamlit==1.41.1

tokenizer.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5098539c7180fbcd53ee121c55da7aa7f2fbd56d19b1a3bbd519aa00be296f4e
+size 234653

word2vec_model.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b33706aa2f2b2b322194457cf95341ad9ea61bbe473903d9f870d309ddfea01c
+size 14578634