Spaces:

rianders
/

live_view_embeddings

Build error

App Files Files Community

rianders commited on May 14, 2024

Commit

3e8771c

verified ·

1 Parent(s): 80f1785

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -46

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import streamlit as st
-from database_utils import init_db, save_embeddings_to_db, get_all_embeddings, clear_all_entries
-from transformers import BertModel, BertTokenizer
 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
-# Initialize and load the BERT model and tokenizer
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-model = BertModel.from_pretrained('bert-base-uncased')
 def get_bert_embeddings(words):
     embeddings = []
@@ -19,57 +18,54 @@ def get_bert_embeddings(words):
     return embeddings
 def plot_interactive_bert_embeddings(embeddings, words):
-    pca = PCA(n_components=3)
-    reduced_embeddings = pca.fit_transform(embeddings)
-    fig = go.Figure(data=[
-        go.Scatter3d(
-            x=[emb[0]],
-            y=[emb[1]],
-            z=[emb[2]],
-            mode='markers+text',
-            text=word,
-            name=word
-        ) for emb, word in zip(reduced_embeddings, words)
-    ], layout=go.Layout(
-        title='3D Scatter Plot of BERT Embeddings',
-        scene=dict(
-            xaxis=dict(title='PCA Component 1'),
-            yaxis=dict(title='PCA Component 2'),
-            zaxis=dict(title='PCA Component 3')
-        ),
-        autosize=False,
-        width=800,
-        height=600
-    ))
-    st.plotly_chart(fig, use_container_width=True)
 def main():
     st.title("BERT Embeddings Visualization")
-    init_db()
-    # Default starter words
-    default_words = ["apple", "rocket", "philosophy"]
-    # Load and plot default words if database is empty
-    if not get_all_embeddings():
-        embeddings = get_bert_embeddings(default_words)
-        for word, emb in zip(default_words, embeddings):
-            save_embeddings_to_db(word, emb)
-        plot_interactive_bert_embeddings(embeddings, default_words)
     new_word = st.text_input("Enter a new word or phrase:")
     if st.button("Add Word/Phrase"):
         if new_word:
-            embedding = get_bert_embeddings([new_word])[0]
-            save_embeddings_to_db(new_word, embedding)
-            embeddings, words = get_all_embeddings()
-            plot_interactive_bert_embeddings(embeddings, words)
-    if st.button("Reset to Default Words"):
-        clear_all_entries()
-        embeddings = get_bert_embeddings(default_words)
-        for word, emb in zip(default_words, embeddings):
-            save_embeddings_to_db(word, emb)
-        plot_interactive_bert_embeddings(embeddings, default_words)
 if __name__ == "__main__":
     main()

 import streamlit as st
+from transformers import AutoModel, BertTokenizer
 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
+# Initialize tokenizer and model globally if possible to avoid reloading it on every function call
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+model = AutoModel.from_pretrained('bert-base-uncased')
 def get_bert_embeddings(words):
     embeddings = []
     return embeddings
 def plot_interactive_bert_embeddings(embeddings, words):
+    if len(words) >= 3:  # Ensure there are at least 3 words for 3D PCA
+        pca = PCA(n_components=3)
+        reduced_embeddings = pca.fit_transform(embeddings)
+        fig = go.Figure(data=[
+            go.Scatter3d(
+                x=[emb[0]],
+                y=[emb[1]],
+                z=[emb[2]],
+                mode='markers+text',
+                text=word,
+                name=word
+            ) for emb, word in zip(reduced_embeddings, words)
+        ], layout=go.Layout(
+            title='3D Scatter Plot of BERT Embeddings',
+            scene=dict(
+                xaxis=dict(title='PCA Component 1'),
+                yaxis=dict(title='PCA Component 2'),
+                zaxis=dict(title='PCA Component 3')
+            ),
+            autosize=False,
+            width=800,
+            height=600
+        ))
+        st.plotly_chart(fig, use_container_width=True)
+    else:
+        st.error("Please add more words to visualize. A minimum of three is required.")
 def main():
     st.title("BERT Embeddings Visualization")
+    default_words = ["apple", "rocket", "philosophy"]  # Default set of words
+    state_key = "words"
+    if state_key not in st.session_state:
+        st.session_state[state_key] = default_words
+    if st.button("Reset to Default Words"):
+        st.session_state[state_key] = default_words[:]
+        st.experimental_rerun()
     new_word = st.text_input("Enter a new word or phrase:")
     if st.button("Add Word/Phrase"):
         if new_word:
+            st.session_state[state_key].append(new_word)
+            st.experimental_rerun()
+    words = st.session_state[state_key]
+    embeddings = get_bert_embeddings(words)
+    plot_interactive_bert_embeddings(embeddings, words)
 if __name__ == "__main__":
     main()