Spaces:
Sleeping
Sleeping
hold up let it cook
Browse files
app.py
CHANGED
|
@@ -12,29 +12,32 @@ with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file:
|
|
| 12 |
# Print the text below
|
| 13 |
print("success")
|
| 14 |
|
| 15 |
-
chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n
|
| 16 |
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 17 |
chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
|
|
|
|
| 18 |
|
| 19 |
def get_relevant_context(query, top_k=3):
|
| 20 |
-
query_embedding = embedder.encode(query, convert_to_tensor
|
| 21 |
-
query_embedding = query_embedding
|
| 22 |
-
norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
|
| 23 |
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
client = InferenceClient("microsoft/phi-4")
|
| 30 |
def respond(message, history):
|
| 31 |
-
messages = [{"role": "system", "content": "you are a realistic and friendly career advisor to help secondary school students with important decisions such as the university courses they should apply to, careers to pursue, etc. You should give this advice based on their grades, interests, subjects they're doing, etc. Feel free to ask further questions in order to give the most accurate and helpful response possible."}]
|
| 32 |
if history:
|
| 33 |
messages.extend(history)
|
| 34 |
messages.append({"role": "user", "content":message})
|
| 35 |
response = client.chat_completion(
|
| 36 |
-
messages,
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
)
|
| 39 |
return response['choices'][0]['message']['content'].strip()
|
| 40 |
|
|
|
|
| 12 |
# Print the text below
|
| 13 |
print("success")
|
| 14 |
|
| 15 |
+
chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()]
|
| 16 |
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 17 |
chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
|
| 18 |
+
norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1)
|
| 19 |
|
| 20 |
def get_relevant_context(query, top_k=3):
|
| 21 |
+
query_embedding = embedder.encode(query, convert_to_tensor=True)
|
| 22 |
+
query_embedding = torch.nn.functional.normalize(query_embedding, dim=0)
|
|
|
|
| 23 |
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
|
| 24 |
+
k = min(top_k, similarities.shape[0])
|
| 25 |
+
top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist()
|
| 26 |
+
selected = [chunks[i] for i in top_k_indices]
|
| 27 |
+
return selected, top_k_indices
|
| 28 |
|
| 29 |
|
| 30 |
client = InferenceClient("microsoft/phi-4")
|
| 31 |
def respond(message, history):
|
| 32 |
+
messages = [{"role": "system", "content": "you are a realistic and friendly career advisor to help secondary school students with important decisions such as the university courses they should apply to, careers to pursue, etc. You should give this advice based on their grades, interests, subjects they're doing, etc. Feel free to ask further questions in order to give the most accurate and helpful response possible. Prioritise answering questions with the information supplied. Be concise, helpful and, if useful, ask brief follow-up questions."}]
|
| 33 |
if history:
|
| 34 |
messages.extend(history)
|
| 35 |
messages.append({"role": "user", "content":message})
|
| 36 |
response = client.chat_completion(
|
| 37 |
+
messages = messages,
|
| 38 |
+
temperature = 0.0,
|
| 39 |
+
max_tokens=500,
|
| 40 |
+
top_p = 1.0
|
| 41 |
)
|
| 42 |
return response['choices'][0]['message']['content'].strip()
|
| 43 |
|