louisepxllock commited on
Commit
c8bcb68
·
verified ·
1 Parent(s): 273e830

hold up let it cook

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -12,29 +12,32 @@ with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file:
12
  # Print the text below
13
  print("success")
14
 
15
- chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n\n") if chunk.strip()]
16
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
17
  chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
 
18
 
19
  def get_relevant_context(query, top_k=3):
20
- query_embedding = embedder.encode(query, convert_to_tensor = True)
21
- query_embedding = query_embedding / query_embedding.norm()
22
- norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
23
  similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
24
- top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
25
- context = "\n\n".join([chunks[i] for i in top_k_indices])
26
- return context
 
27
 
28
 
29
  client = InferenceClient("microsoft/phi-4")
30
  def respond(message, history):
31
- messages = [{"role": "system", "content": "you are a realistic and friendly career advisor to help secondary school students with important decisions such as the university courses they should apply to, careers to pursue, etc. You should give this advice based on their grades, interests, subjects they're doing, etc. Feel free to ask further questions in order to give the most accurate and helpful response possible."}]
32
  if history:
33
  messages.extend(history)
34
  messages.append({"role": "user", "content":message})
35
  response = client.chat_completion(
36
- messages,
37
- max_tokens=500
 
 
38
  )
39
  return response['choices'][0]['message']['content'].strip()
40
 
 
12
  # Print the text below
13
  print("success")
14
 
15
+ chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()]
16
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
17
  chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
18
+ norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1)
19
 
20
  def get_relevant_context(query, top_k=3):
21
+ query_embedding = embedder.encode(query, convert_to_tensor=True)
22
+ query_embedding = torch.nn.functional.normalize(query_embedding, dim=0)
 
23
  similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
24
+ k = min(top_k, similarities.shape[0])
25
+ top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist()
26
+ selected = [chunks[i] for i in top_k_indices]
27
+ return selected, top_k_indices
28
 
29
 
30
  client = InferenceClient("microsoft/phi-4")
31
  def respond(message, history):
32
+ messages = [{"role": "system", "content": "you are a realistic and friendly career advisor to help secondary school students with important decisions such as the university courses they should apply to, careers to pursue, etc. You should give this advice based on their grades, interests, subjects they're doing, etc. Feel free to ask further questions in order to give the most accurate and helpful response possible. Prioritise answering questions with the information supplied. Be concise, helpful and, if useful, ask brief follow-up questions."}]
33
  if history:
34
  messages.extend(history)
35
  messages.append({"role": "user", "content":message})
36
  response = client.chat_completion(
37
+ messages = messages,
38
+ temperature = 0.0,
39
+ max_tokens=500,
40
+ top_p = 1.0
41
  )
42
  return response['choices'][0]['message']['content'].strip()
43