Spaces:

louisepxllock
/

First-chatbot

Sleeping

App Files Files Community

louisepxllock commited on Aug 14, 2025

Commit

c8bcb68

verified ·

1 Parent(s): 273e830

hold up let it cook

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -12,29 +12,32 @@ with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file:
 # Print the text below
 print("success")
-chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n\n") if chunk.strip()]
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
 chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
 def get_relevant_context(query, top_k=3):
-    query_embedding = embedder.encode(query, convert_to_tensor = True)
-    query_embedding = query_embedding / query_embedding.norm()
-    norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
     similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
-    top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
-    context = "\n\n".join([chunks[i] for i in top_k_indices])
-    return context
 client = InferenceClient("microsoft/phi-4")
 def respond(message, history):
-    messages = [{"role": "system", "content": "you are a realistic and friendly career advisor to help secondary school students with important decisions such as the university courses they should apply to, careers to pursue, etc. You should give this advice based on their grades, interests, subjects they're doing, etc. Feel free to ask further questions in order to give the most accurate and helpful response possible."}]
     if history:
         messages.extend(history)
     messages.append({"role": "user", "content":message})
     response = client.chat_completion(
-        messages,
-        max_tokens=500
     )
     return response['choices'][0]['message']['content'].strip()

 # Print the text below
 print("success")
+chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()]
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
 chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
+norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1)
 def get_relevant_context(query, top_k=3):
+    query_embedding = embedder.encode(query, convert_to_tensor=True)
+    query_embedding = torch.nn.functional.normalize(query_embedding, dim=0)
     similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
+    k = min(top_k, similarities.shape[0])
+    top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist()
+    selected = [chunks[i] for i in top_k_indices]
+    return selected, top_k_indices
 client = InferenceClient("microsoft/phi-4")
 def respond(message, history):
+    messages = [{"role": "system", "content": "you are a realistic and friendly career advisor to help secondary school students with important decisions such as the university courses they should apply to, careers to pursue, etc. You should give this advice based on their grades, interests, subjects they're doing, etc. Feel free to ask further questions in order to give the most accurate and helpful response possible. Prioritise answering questions with the information supplied. Be concise, helpful and, if useful, ask brief follow-up questions."}]
     if history:
         messages.extend(history)
     messages.append({"role": "user", "content":message})
     response = client.chat_completion(
+        messages = messages,
+        temperature = 0.0,
+        max_tokens=500,
+        top_p = 1.0
     )
     return response['choices'][0]['message']['content'].strip()