barkbites

Sleeping

App Files Files Community

ritikaaA commited on Aug 15, 2025

Commit

512cd48

verified ·

1 Parent(s): c4d80ba

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -8

app.py CHANGED Viewed

@@ -135,13 +135,14 @@ chunk_embeddings = create_embeddings(brand_chunks)
 # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
 def get_top_chunks(query, chunk_embeddings, text_chunks):
   # Convert the query text into a vector embedding
   query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line
   # Normalize the query embedding to unit length for accurate similarity comparison. Normalize = bring to a length of 1
   query_embedding_normalized = query_embedding / query_embedding.norm()
-  # Normalize all chunk embeddings to unit length for consistent comparison
   # chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
   if chunk_embeddings.ndim == 1:
       chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm()
@@ -154,22 +155,38 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
   print(similarities)
   # Find the indices of the 3 chunks with highest similarity scores
-  top_indices = torch.topk(similarities, k= min(7, len(text_chunks))).indices
   # Print the top indices
   print(top_indices)
   # Create an empty list to store the most relevant chunks
-  top_chunks = []
   # Loop through the top indices and retrieve the corresponding text chunks
-  for i in top_indices:
-    relevant_info = brand_chunks[i]
-    top_chunks.append(relevant_info)
   # Return the list of most relevant chunks
-  return top_chunks
 # theme
 custom_theme = gr.themes.Soft(

 # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
 def get_top_chunks(query, chunk_embeddings, text_chunks):
+  if not text_chunks or chunk_embeddings is None or chunk_embeddings.size(0) == 0:
+    return []
   # Convert the query text into a vector embedding
   query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line
   # Normalize the query embedding to unit length for accurate similarity comparison. Normalize = bring to a length of 1
   query_embedding_normalized = query_embedding / query_embedding.norm()
   # chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
   if chunk_embeddings.ndim == 1:
       chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm()
   print(similarities)
   # Find the indices of the 3 chunks with highest similarity scores
+  top_indices = torch.topk(similarities, k= min(3, len(text_chunks))).indices
+  candidate_chunks = [(i.item(), similarities[i].item()) for i in top_indices]
   # Print the top indices
   print(top_indices)
+  filtered_chunks = [(idx, score) for idx, score in candidate_chunks if score >= similarity_threshold]
+  def keyword_score(chunk_text, query_text):
+        q_words = set(query_text.lower().split())
+        c_words = set(chunk_text.lower().split())
+        return len(q_words & c_words)
+  reranked = sorted(
+        filtered_chunks,
+        key=lambda x: keyword_score(text_chunks[x[0]], query),
+        reverse=True
+    )
+  final_chunks = [text_chunks[idx] for idx, _ in reranked]
+  return final_chunks
   # Create an empty list to store the most relevant chunks
+ # top_chunks = []
   # Loop through the top indices and retrieve the corresponding text chunks
+ # for i in top_indices:
+ #   relevant_info = brand_chunks[i]
+  #  top_chunks.append(relevant_info)
   # Return the list of most relevant chunks
+ # return top_chunks
 # theme
 custom_theme = gr.themes.Soft(