mingbaer commited on
Commit
917b532
·
verified ·
1 Parent(s): 2e2648c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -44,26 +44,35 @@ essay_chunks.extend(staar_chunks)
44
  # load an embedding model
45
  model = SentenceTransformer('all-MiniLM-L6-v2')
46
 
47
- chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
 
 
48
 
49
- def pull_relevant_info(query, top_k=3):
50
- query_embedding = model.encode(query, convert_to_tensor=True)
 
 
51
  query_embedding = query_embedding / query_embedding.norm()
52
 
53
  norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
54
 
55
  similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
56
 
57
- top_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
 
 
 
 
 
 
58
 
59
- relevant_info = "\n\n".join([cleaned_chunks[i] for i in top_indices])
60
- return relevant_info
61
 
62
  client = InferenceClient("microsoft/phi-4")
63
 
64
  def respond(message, history):
65
 
66
- info = pull_relevant_info(message, top_k=3)
67
  system_message = (f"You are a helpful and kind teacher named Ms. Honey. You respond clearly in no more than three complete sentences. If a user asks you to write something for them, you refuse and remind them they are capable of writing the piece themselves. Use the following information to help answer the user's question:\n\n{info}\n\n")
68
  messages = [{"role": "system", "content": system_message}]
69
 
 
44
  # load an embedding model
45
  model = SentenceTransformer('all-MiniLM-L6-v2')
46
 
47
+ def create_embeddings(text_chunks):
48
+ chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
49
+ return chunk_embeddings
50
 
51
+ essay_embeddings = create_embeddings(essay_chunks)
52
+
53
+ def pull_relevant_info(message, chunk_embeddings, text_chunks):
54
+ query_embedding = model.encode(message, convert_to_tensor=True)
55
  query_embedding = query_embedding / query_embedding.norm()
56
 
57
  norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
58
 
59
  similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
60
 
61
+ top_indices = torch.topk(similarities, k=3).indices
62
+
63
+ top_chunks = []
64
+
65
+ for i in top_indices:
66
+ relevant_info = text_chunks[i]
67
+ top_chunks.append(relevant_info)
68
 
69
+ return top_chunks
 
70
 
71
  client = InferenceClient("microsoft/phi-4")
72
 
73
  def respond(message, history):
74
 
75
+ info = pull_relevant_info(message, essay_embeddings, essay_chunks)
76
  system_message = (f"You are a helpful and kind teacher named Ms. Honey. You respond clearly in no more than three complete sentences. If a user asks you to write something for them, you refuse and remind them they are capable of writing the piece themselves. Use the following information to help answer the user's question:\n\n{info}\n\n")
77
  messages = [{"role": "system", "content": system_message}]
78