Spaces:

rianders
/

live_view_embeddings

Build error

App Files Files Community

rianders commited on May 14, 2024

Commit

80f1785

verified ·

1 Parent(s): 2a28595

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -51

app.py CHANGED Viewed

@@ -1,70 +1,75 @@
 import streamlit as st
-from database_utils import init_db, save_embeddings_to_db, get_all_embeddings, fetch_data_as_csv, clear_all_entries
 from transformers import BertModel, BertTokenizer
 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
-# BERT embeddings function
-def get_bert_embeddings(word):
-    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-    model = BertModel.from_pretrained('bert-base-uncased')
-    inputs = tokenizer(word, return_tensors='pt')
-    outputs = model(**inputs)
-    mean_embedding = outputs.last_hidden_state[0].mean(dim=0).detach().numpy()
-    return mean_embedding  # Return the mean embedding directly
-def plot_interactive_bert_embeddings():
-    embeddings, sentences = get_all_embeddings()
-    if len(sentences) > 0:
-        # Even if there's less than 3, PCA can still run with min(n_samples, n_features)
-        pca = PCA(n_components=min(3, len(sentences)))
-        reduced_embeddings = pca.fit_transform(np.array(embeddings))
-        fig = go.Figure(data=[
-            go.Scatter3d(
-                x=[emb[0]],
-                y=[emb[1] if len(emb) > 1 else 0],  # Ensure there are enough dimensions
-                z=[emb[2] if len(emb) > 2 else 0],
-                mode='markers+text',
-                text=sent,
-                name=sent
-            ) for emb, sent in zip(reduced_embeddings, sentences)
-        ], layout=go.Layout(
-            title='3D Scatter Plot of BERT Embeddings',
-            scene=dict(
-                xaxis=dict(title='PCA Component 1'),
-                yaxis=dict(title='PCA Component 2'),
-                zaxis=dict(title='PCA Component 3')
-            ),
-            autosize=False,
-            width=800,
-            height=600
-        ))
-        st.plotly_chart(fig, use_container_width=True)
-    else:
-        st.error("No data available for visualization.")
 def main():
-    st.title("BERT Embeddings Visualization - Community Edition")
     init_db()
     new_word = st.text_input("Enter a new word or phrase:")
     if st.button("Add Word/Phrase"):
         if new_word:
-            embedding = get_bert_embeddings(new_word)
             save_embeddings_to_db(new_word, embedding)
-            st.success(f"Added: {new_word}")
-    if st.button("Visualize Embeddings"):
-        plot_interactive_bert_embeddings()
-    if st.button("Clear All Entries"):
-        clear_all_entries()  # This function needs to be defined in database_utils.py to delete all records
-        st.success("All entries have been cleared.")
 if __name__ == "__main__":
     main()

 import streamlit as st
+from database_utils import init_db, save_embeddings_to_db, get_all_embeddings, clear_all_entries
 from transformers import BertModel, BertTokenizer
 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
+# Initialize and load the BERT model and tokenizer
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+model = BertModel.from_pretrained('bert-base-uncased')
+def get_bert_embeddings(words):
+    embeddings = []
+    for word in words:
+        inputs = tokenizer(word, return_tensors='pt')
+        outputs = model(**inputs)
+        mean_embedding = outputs.last_hidden_state[0].mean(dim=0).detach().numpy()
+        embeddings.append(mean_embedding)
+    return embeddings
+def plot_interactive_bert_embeddings(embeddings, words):
+    pca = PCA(n_components=3)
+    reduced_embeddings = pca.fit_transform(embeddings)
+    fig = go.Figure(data=[
+        go.Scatter3d(
+            x=[emb[0]],
+            y=[emb[1]],
+            z=[emb[2]],
+            mode='markers+text',
+            text=word,
+            name=word
+        ) for emb, word in zip(reduced_embeddings, words)
+    ], layout=go.Layout(
+        title='3D Scatter Plot of BERT Embeddings',
+        scene=dict(
+            xaxis=dict(title='PCA Component 1'),
+            yaxis=dict(title='PCA Component 2'),
+            zaxis=dict(title='PCA Component 3')
+        ),
+        autosize=False,
+        width=800,
+        height=600
+    ))
+    st.plotly_chart(fig, use_container_width=True)
 def main():
+    st.title("BERT Embeddings Visualization")
     init_db()
+    # Default starter words
+    default_words = ["apple", "rocket", "philosophy"]
+    # Load and plot default words if database is empty
+    if not get_all_embeddings():
+        embeddings = get_bert_embeddings(default_words)
+        for word, emb in zip(default_words, embeddings):
+            save_embeddings_to_db(word, emb)
+        plot_interactive_bert_embeddings(embeddings, default_words)
     new_word = st.text_input("Enter a new word or phrase:")
     if st.button("Add Word/Phrase"):
         if new_word:
+            embedding = get_bert_embeddings([new_word])[0]
             save_embeddings_to_db(new_word, embedding)
+            embeddings, words = get_all_embeddings()
+            plot_interactive_bert_embeddings(embeddings, words)
+    if st.button("Reset to Default Words"):
+        clear_all_entries()
+        embeddings = get_bert_embeddings(default_words)
+        for word, emb in zip(default_words, embeddings):
+            save_embeddings_to_db(word, emb)
+        plot_interactive_bert_embeddings(embeddings, default_words)
 if __name__ == "__main__":
     main()