Spaces:

AllenChai
/

EEP596_MiniProject1_StarGroup

Sleeping

App Files Files Community

yuanjunchai commited on Jan 27, 2025

Commit

de5ed54

1 Parent(s): 8266b4e

add application files

Browse files

Files changed (1) hide show

app.py +23 -33

app.py CHANGED Viewed

@@ -140,10 +140,7 @@ def update_category_embeddings(embeddings_metadata):
     get_category_embeddings(embeddings_metadata)
 ### Plotting utility functions
 def plot_piechart(sorted_cosine_scores_items):
     sorted_cosine_scores = np.array([
             sorted_cosine_scores_items[index][1]
@@ -243,25 +240,17 @@ def cosine_similarity(x, y):
     3. Return exponentiated cosine similarity
     (20 pts)
     """
-    # 点积计算
-    dot_product = np.dot(x, y)
-    norm_x = np.linalg.norm(x)
-    norm_y = np.linalg.norm(y)
-    if norm_x == 0 or norm_y == 0:
-        cosine_sim = 0
-    else:
-        cosine_sim = dot_product / (norm_x * norm_y)
-    exp_cosine_sim = math.exp(cosine_sim)
     return exp_cosine_sim
 # Task II: Average Glove Embedding Calculation
 def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, model_type=50):
@@ -274,22 +263,23 @@ def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, mode
     5. Return averaged embeddings
     (30 pts)
     """
-    # 分割句子，遍历单词，计算平均嵌入
-    embedding_dim = np.zeros(int(model_type.split("d")[0]))
-    embedding = np.zeros(embedding_dim)
     words = sentence.split()
-    valid_word_count = 0
     for word in words:
-        if word.lower() in word_index_dict:
-            embedding += embeddings[word_index_dict[word.lower()]]
-            valid_word_count += 1
-    if valid_word_count > 0:
-        embedding /= valid_word_count
     return embedding
@@ -395,7 +385,7 @@ if __name__ == "__main__":
         key="text_search",
         value="Roses are red, trucks are blue, and Seattle is grey right now",
     )
-    # st.session_state.text_search = text_search
     # Download glove embeddings if it doesn't exist
     embeddings_path = "embeddings_" + str(model_type) + "_temp.npy"
@@ -426,7 +416,7 @@ if __name__ == "__main__":
         }
         with st.spinner("Obtaining Cosine similarity for Glove..."):
             sorted_cosine_sim_glove = get_sorted_cosine_similarity(
-                st.session_state.text_search, embeddings_metadata
             )
         # Sentence transformer embeddings
@@ -434,7 +424,7 @@ if __name__ == "__main__":
         embeddings_metadata = {"embedding_model": "transformers", "model_name": ""}
         with st.spinner("Obtaining Cosine similarity for 384d sentence transformer..."):
             sorted_cosine_sim_transformer = get_sorted_cosine_similarity(
-                st.session_state.text_search, embeddings_metadata
             )
         # Results and Plot Pie Chart for Glove

     get_category_embeddings(embeddings_metadata)
 ### Plotting utility functions
 def plot_piechart(sorted_cosine_scores_items):
     sorted_cosine_scores = np.array([
             sorted_cosine_scores_items[index][1]
     3. Return exponentiated cosine similarity
     (20 pts)
     """
+    x_norm = np.linalg.norm(x)
+    y_norm = np.linalg.norm(y)
+    if x_norm == 0 or y_norm == 0:
+        raise ValueError("Cannot compute cosine similarity with zero vector")
+    cosine_sim = np.dot(x, y) / (x_norm * y_norm)
+    exp_cosine_sim = np.exp(cosine_sim)
     return exp_cosine_sim
 # Task II: Average Glove Embedding Calculation
 def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, model_type=50):
     5. Return averaged embeddings
     (30 pts)
     """
+    embedding = np.zeros(int(model_type.split("d")[0]))
+    # Split sentence into words
     words = sentence.split()
+    valid_words = 0
     for word in words:
+        # Check if the word is in the word_index_dict
+        if word in word_index_dict:
+            word_idx = word_index_dict[word]
+            embedding += embeddings[word_idx]
+            valid_words += 1
+    if valid_words > 0:
+        raise ValueError("No valid words in sentence")
+    embedding /= valid_words
     return embedding
         key="text_search",
         value="Roses are red, trucks are blue, and Seattle is grey right now",
     )
+    st.session_state.text_search = text_search
     # Download glove embeddings if it doesn't exist
     embeddings_path = "embeddings_" + str(model_type) + "_temp.npy"
         }
         with st.spinner("Obtaining Cosine similarity for Glove..."):
             sorted_cosine_sim_glove = get_sorted_cosine_similarity(
+                embeddings_metadata
             )
         # Sentence transformer embeddings
         embeddings_metadata = {"embedding_model": "transformers", "model_name": ""}
         with st.spinner("Obtaining Cosine similarity for 384d sentence transformer..."):
             sorted_cosine_sim_transformer = get_sorted_cosine_similarity(
+                embeddings_metadata
             )
         # Results and Plot Pie Chart for Glove