louisepxllock's picture
Update app.py
2d63990 verified
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
# 1) Load your embedding-friendly text
with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as f:
uni_dataset_text = f.read()
print("success")
# 2) Chunk correctly (your file uses '---' line separators between records)
chunks = [c.strip() for c in uni_dataset_text.split("\n---\n") if c.strip()]
# 3) Build embeddings (normalize later for cosine similarity)
embedder = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1)
def get_relevant_context(query, top_k=3):
query_embedding = embedder.encode(query, convert_to_tensor=True)
query_embedding = torch.nn.functional.normalize(query_embedding, dim=0)
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
k = min(top_k, similarities.shape[0])
top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist()
selected = [chunks[i] for i in top_k_indices]
return selected, top_k_indices
# 4) HF inference client
client = InferenceClient("microsoft/Phi-4-mini-instruct")
SYSTEM_ROLE = (
"You are a realistic and friendly career advisor for secondary school students. "
"You must prioritise answering using the information in the supplied CONTEXT. "
"Be concise, helpful, and, if useful, ask a brief follow-up question."
"When recommending universities, give the top 3 universities which are best suited for the persons prompt ONLY if the person asks for particular universities."
)
def respond(message, history):
# Retrieve top-k context for the user message
selected_chunks, idxs = get_relevant_context(message, top_k=3)
context_block = "\n\n".join(selected_chunks)
# Build messages WITH context
messages = [
{"role": "system", "content": SYSTEM_ROLE},
{"role": "system", "content": f"CONTEXT:\n{context_block}"}
]
# (Optional) include prior turns, but keep it short to avoid token bloat
if history:
# history is a list of dicts with 'role' and 'content' when type='messages'
# If it's very long, you could truncate here.
messages.extend(history[-6:]) # keep the last few turns
messages.append({"role": "user", "content": message})
# Call the model with conservative sampling to reduce hallucinations
response = client.chat_completion(
messages=messages,
max_tokens=500,
temperature=0.0,
top_p=1.0
)
answer = response['choices'][0]['message']['content'].strip()
return answer
chatbot = gr.ChatInterface(respond, type="messages",theme = gr.themes.Citrus())
#Gradio chat interface
title = "# 📖Welcome to CASSI 📖 "
topics = """
## Welcome to Cassi!
I’m here to help you find your future university and discover what you want to do in the future.
To give me an idea of how to help you, tell me...
- 👩🏻‍🏫 Your predicted grades for A-levels
- 📚 The subjects that interest you
- 💪 Your strengths
Start by asking me a question!🙋‍♀️
"""
with gr.Blocks() as chatbot:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(title)
gr.Markdown(topics)
with gr.Column(scale=2):
gr.ChatInterface(
fn=respond,
type="messages"
)
chatbot.launch()