yuanjunchai commited on
Commit
de5ed54
·
1 Parent(s): 8266b4e

add application files

Browse files
Files changed (1) hide show
  1. app.py +23 -33
app.py CHANGED
@@ -140,10 +140,7 @@ def update_category_embeddings(embeddings_metadata):
140
  get_category_embeddings(embeddings_metadata)
141
 
142
 
143
-
144
-
145
  ### Plotting utility functions
146
-
147
  def plot_piechart(sorted_cosine_scores_items):
148
  sorted_cosine_scores = np.array([
149
  sorted_cosine_scores_items[index][1]
@@ -243,25 +240,17 @@ def cosine_similarity(x, y):
243
  3. Return exponentiated cosine similarity
244
  (20 pts)
245
  """
 
 
 
 
 
 
246
 
247
- # 点积计算
248
-
249
- dot_product = np.dot(x, y)
250
-
251
- norm_x = np.linalg.norm(x)
252
- norm_y = np.linalg.norm(y)
253
-
254
- if norm_x == 0 or norm_y == 0:
255
- cosine_sim = 0
256
- else:
257
- cosine_sim = dot_product / (norm_x * norm_y)
258
-
259
-
260
- exp_cosine_sim = math.exp(cosine_sim)
261
 
262
  return exp_cosine_sim
263
 
264
-
265
 
266
  # Task II: Average Glove Embedding Calculation
267
  def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, model_type=50):
@@ -274,22 +263,23 @@ def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, mode
274
  5. Return averaged embeddings
275
  (30 pts)
276
  """
277
- # 分割句子,遍历单词,计算平均嵌入
278
-
279
- embedding_dim = np.zeros(int(model_type.split("d")[0]))
280
- embedding = np.zeros(embedding_dim)
281
 
 
282
  words = sentence.split()
283
-
284
- valid_word_count = 0
285
 
286
  for word in words:
287
- if word.lower() in word_index_dict:
288
- embedding += embeddings[word_index_dict[word.lower()]]
289
- valid_word_count += 1
290
-
291
- if valid_word_count > 0:
292
- embedding /= valid_word_count
 
 
 
 
293
 
294
  return embedding
295
 
@@ -395,7 +385,7 @@ if __name__ == "__main__":
395
  key="text_search",
396
  value="Roses are red, trucks are blue, and Seattle is grey right now",
397
  )
398
- # st.session_state.text_search = text_search
399
 
400
  # Download glove embeddings if it doesn't exist
401
  embeddings_path = "embeddings_" + str(model_type) + "_temp.npy"
@@ -426,7 +416,7 @@ if __name__ == "__main__":
426
  }
427
  with st.spinner("Obtaining Cosine similarity for Glove..."):
428
  sorted_cosine_sim_glove = get_sorted_cosine_similarity(
429
- st.session_state.text_search, embeddings_metadata
430
  )
431
 
432
  # Sentence transformer embeddings
@@ -434,7 +424,7 @@ if __name__ == "__main__":
434
  embeddings_metadata = {"embedding_model": "transformers", "model_name": ""}
435
  with st.spinner("Obtaining Cosine similarity for 384d sentence transformer..."):
436
  sorted_cosine_sim_transformer = get_sorted_cosine_similarity(
437
- st.session_state.text_search, embeddings_metadata
438
  )
439
 
440
  # Results and Plot Pie Chart for Glove
 
140
  get_category_embeddings(embeddings_metadata)
141
 
142
 
 
 
143
  ### Plotting utility functions
 
144
  def plot_piechart(sorted_cosine_scores_items):
145
  sorted_cosine_scores = np.array([
146
  sorted_cosine_scores_items[index][1]
 
240
  3. Return exponentiated cosine similarity
241
  (20 pts)
242
  """
243
+ x_norm = np.linalg.norm(x)
244
+ y_norm = np.linalg.norm(y)
245
+ if x_norm == 0 or y_norm == 0:
246
+ raise ValueError("Cannot compute cosine similarity with zero vector")
247
+
248
+ cosine_sim = np.dot(x, y) / (x_norm * y_norm)
249
 
250
+ exp_cosine_sim = np.exp(cosine_sim)
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  return exp_cosine_sim
253
 
 
254
 
255
  # Task II: Average Glove Embedding Calculation
256
  def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, model_type=50):
 
263
  5. Return averaged embeddings
264
  (30 pts)
265
  """
266
+ embedding = np.zeros(int(model_type.split("d")[0]))
 
 
 
267
 
268
+ # Split sentence into words
269
  words = sentence.split()
270
+ valid_words = 0
 
271
 
272
  for word in words:
273
+ # Check if the word is in the word_index_dict
274
+ if word in word_index_dict:
275
+ word_idx = word_index_dict[word]
276
+ embedding += embeddings[word_idx]
277
+ valid_words += 1
278
+
279
+ if valid_words > 0:
280
+ raise ValueError("No valid words in sentence")
281
+
282
+ embedding /= valid_words
283
 
284
  return embedding
285
 
 
385
  key="text_search",
386
  value="Roses are red, trucks are blue, and Seattle is grey right now",
387
  )
388
+ st.session_state.text_search = text_search
389
 
390
  # Download glove embeddings if it doesn't exist
391
  embeddings_path = "embeddings_" + str(model_type) + "_temp.npy"
 
416
  }
417
  with st.spinner("Obtaining Cosine similarity for Glove..."):
418
  sorted_cosine_sim_glove = get_sorted_cosine_similarity(
419
+ embeddings_metadata
420
  )
421
 
422
  # Sentence transformer embeddings
 
424
  embeddings_metadata = {"embedding_model": "transformers", "model_name": ""}
425
  with st.spinner("Obtaining Cosine similarity for 384d sentence transformer..."):
426
  sorted_cosine_sim_transformer = get_sorted_cosine_similarity(
427
+ embeddings_metadata
428
  )
429
 
430
  # Results and Plot Pie Chart for Glove