Spaces:

rianders
/

live_view_embeddings

Build error

App Files Files Community

rianders commited on May 13, 2024

Commit

77b117f

verified ·

1 Parent(s): 26a46c8

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -41

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 from transformers import BertModel, BertTokenizer
-import torch
 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
@@ -10,40 +10,34 @@ def get_bert_embeddings(words):
     tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
     model = BertModel.from_pretrained('bert-base-uncased')
     embeddings = []
     for word in words:
         inputs = tokenizer(word, return_tensors='pt')
         outputs = model(**inputs)
-        # Calculate mean of embeddings across all tokens in the phrase
         mean_embedding = outputs.last_hidden_state[0].mean(dim=0).detach().numpy()
         embeddings.append(mean_embedding)
     if len(embeddings) > 0:
         pca = PCA(n_components=3)
         reduced_embeddings = pca.fit_transform(np.array(embeddings))
         return reduced_embeddings
     return []
 # Plotly plotting function
 def plot_interactive_bert_embeddings(embeddings, words):
     if len(words) < 4:
         st.error("Please provide at least 4 words/phrases for effective visualization.")
         return None
     data = []
     for i, word in enumerate(words):
         trace = go.Scatter3d(
-            x=[embeddings[i][0]],
-            y=[embeddings[i][1]],
             z=[embeddings[i][2]],
             mode='markers+text',
             text=[word],
             name=word
         )
         data.append(trace)
     layout = go.Layout(
         title='3D Scatter Plot of BERT Embeddings',
         scene=dict(
@@ -55,41 +49,32 @@ def plot_interactive_bert_embeddings(embeddings, words):
         width=800,
         height=600
     )
     fig = go.Figure(data=data, layout=layout)
     return fig
 def main():
-    st.title("BERT Embeddings Visualization")
-    # Initialize or get existing words list from the session state
-    if 'words' not in st.session_state:
-        st.session_state.words = []
-    # Text input for new words
-    new_words_input = st.text_input("Enter a new word/phrase:")
-    # Button to add new words
-    if st.button("Add Word/Phrase"):
-        if new_words_input:
-            st.session_state.words.append(new_words_input)
-            st.success(f"Added: {new_words_input}")
-    # Display current list of words
-    if st.session_state.words:
-        st.write("Current list of words/phrases:", ', '.join(st.session_state.words))
-    # Generate embeddings and plot
-    if st.button("Generate Embeddings"):
-        with st.spinner('Generating embeddings...'):
-            embeddings = get_bert_embeddings(st.session_state.words)
-            fig = plot_interactive_bert_embeddings(embeddings, st.session_state.words)
-            if fig is not None:
-                st.plotly_chart(fig, use_container_width=True)
-    # Reset button
-    if st.button("Reset"):
-        st.session_state.words = []
 if __name__ == "__main__":
     main()

 import streamlit as st
+from database_utils import init_db, save_embeddings_to_db, get_all_embeddings
 from transformers import BertModel, BertTokenizer
 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
     tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
     model = BertModel.from_pretrained('bert-base-uncased')
     embeddings = []
     for word in words:
         inputs = tokenizer(word, return_tensors='pt')
         outputs = model(**inputs)
+        # Average the embeddings of all tokens for the word/phrase
         mean_embedding = outputs.last_hidden_state[0].mean(dim=0).detach().numpy()
         embeddings.append(mean_embedding)
     if len(embeddings) > 0:
         pca = PCA(n_components=3)
         reduced_embeddings = pca.fit_transform(np.array(embeddings))
         return reduced_embeddings
     return []
 # Plotly plotting function
 def plot_interactive_bert_embeddings(embeddings, words):
     if len(words) < 4:
         st.error("Please provide at least 4 words/phrases for effective visualization.")
         return None
     data = []
     for i, word in enumerate(words):
         trace = go.Scatter3d(
+            x=[embeddings[i][0]],
+            y=[embeddings[i][1]],
             z=[embeddings[i][2]],
             mode='markers+text',
             text=[word],
             name=word
         )
         data.append(trace)
     layout = go.Layout(
         title='3D Scatter Plot of BERT Embeddings',
         scene=dict(
         width=800,
         height=600
     )
     fig = go.Figure(data=data, layout=layout)
     return fig
 def main():
+    st.title("BERT Embeddings Visualization - Community Edition")
+    # Button to initialize the database
+    if st.button("Initialize Database"):
+        msg = init_db()
+        st.success(msg)
+    # Text input for new sentence
+    new_sentence = st.text_input("Enter a new sentence:")
+    if st.button("Add and Visualize Sentence"):
+        if new_sentence:
+            embeddings = get_bert_embeddings([new_sentence])
+            if embeddings.size > 0:
+                save_embeddings_to_db(new_sentence, embeddings[0])
+                st.success("Sentence added and embedding saved!")
+    # Button to display all embeddings
+    if st.button("Show All Embeddings"):
+        embeddings, sentences = get_all_embeddings()
+        fig = plot_interactive_bert_embeddings(np.vstack(embeddings), sentences)
+        if fig is not None:
+            st.plotly_chart(fig, use_container_width=True)
 if __name__ == "__main__":
     main()