Spaces:

rianders
/

live_view_embeddings

Build error

App Files Files Community

rianders commited on May 14, 2024

Commit

6ef9d55

verified ·

1 Parent(s): 3e8771c

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -11

app.py CHANGED Viewed

@@ -3,8 +3,9 @@ from transformers import AutoModel, BertTokenizer
 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
-# Initialize tokenizer and model globally if possible to avoid reloading it on every function call
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 model = AutoModel.from_pretrained('bert-base-uncased')
@@ -13,8 +14,8 @@ def get_bert_embeddings(words):
     for word in words:
         inputs = tokenizer(word, return_tensors='pt')
         outputs = model(**inputs)
-        mean_embedding = outputs.last_hidden_state[0].mean(dim=0).detach().numpy()
-        embeddings.append(mean_embedding)
     return embeddings
 def plot_interactive_bert_embeddings(embeddings, words):
@@ -48,23 +49,40 @@ def plot_interactive_bert_embeddings(embeddings, words):
 def main():
     st.title("BERT Embeddings Visualization")
-    default_words = ["apple", "rocket", "philosophy"]  # Default set of words
-    state_key = "words"
-    if state_key not in st.session_state:
-        st.session_state[state_key] = default_words
     if st.button("Reset to Default Words"):
-        st.session_state[state_key] = default_words[:]
         st.experimental_rerun()
     new_word = st.text_input("Enter a new word or phrase:")
     if st.button("Add Word/Phrase"):
         if new_word:
-            st.session_state[state_key].append(new_word)
             st.experimental_rerun()
-    words = st.session_state[state_key]
-    embeddings = get_bert_embeddings(words)
     plot_interactive_bert_embeddings(embeddings, words)
 if __name__ == "__main__":

 from sklearn.decomposition import PCA
 import plotly.graph_objs as go
 import numpy as np
+from database_utils import init_db, save_embeddings_to_db, get_all_embeddings, clear_all_entries, fetch_data_as_csv
+# Initialize BERT model and tokenizer
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 model = AutoModel.from_pretrained('bert-base-uncased')
     for word in words:
         inputs = tokenizer(word, return_tensors='pt')
         outputs = model(**inputs)
+        mean_embedding = outputs.last_hidden_state.mean(dim=1).detach().numpy()
+        embeddings.append(mean_embedding[0])
     return embeddings
 def plot_interactive_bert_embeddings(embeddings, words):
 def main():
     st.title("BERT Embeddings Visualization")
+    default_words = ["apple", "rocket", "philosophy"]
+    if "words" not in st.session_state:
+        st.session_state.words = default_words
+        init_db()  # Initialize the database
+        for word in default_words:
+            embedding = get_bert_embeddings([word])[0]
+            save_embeddings_to_db(word, embedding)
     if st.button("Reset to Default Words"):
+        clear_all_entries()
+        st.session_state.words = default_words[:]
+        for word in default_words:
+            embedding = get_bert_embeddings([word])[0]
+            save_embeddings_to_db(word, embedding)
         st.experimental_rerun()
     new_word = st.text_input("Enter a new word or phrase:")
     if st.button("Add Word/Phrase"):
         if new_word:
+            embedding = get_bert_embeddings([new_word])[0]
+            save_embeddings_to_db(new_word, embedding)
+            st.session_state.words.append(new_word)
             st.experimental_rerun()
+    if st.button("Clear All Entries"):
+        clear_all_entries()
+        st.session_state.words = default_words[:]
+        st.experimental_rerun()
+    if st.button("Download Database as CSV"):
+        csv = fetch_data_as_csv()
+        st.download_button(label="Download CSV", data=csv, file_name='embeddings.csv', mime='text/csv')
+    embeddings, words = get_all_embeddings()
     plot_interactive_bert_embeddings(embeddings, words)
 if __name__ == "__main__":