| import gradio as gr |
| from huggingface_hub import InferenceClient |
| from sentence_transformers import SentenceTransformer |
| import torch |
| import numpy as np |
|
|
| |
| with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as f: |
| uni_dataset_text = f.read() |
|
|
| print("success") |
|
|
| |
| chunks = [c.strip() for c in uni_dataset_text.split("\n---\n") if c.strip()] |
|
|
| |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') |
| chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True) |
| norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1) |
|
|
| def get_relevant_context(query, top_k=3): |
| query_embedding = embedder.encode(query, convert_to_tensor=True) |
| query_embedding = torch.nn.functional.normalize(query_embedding, dim=0) |
| similarities = torch.matmul(norm_chunk_embeddings, query_embedding) |
| k = min(top_k, similarities.shape[0]) |
| top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist() |
| selected = [chunks[i] for i in top_k_indices] |
| return selected, top_k_indices |
|
|
| |
| client = InferenceClient("microsoft/Phi-4-mini-instruct") |
|
|
| SYSTEM_ROLE = ( |
| "You are a realistic and friendly career advisor for secondary school students. " |
| "You must prioritise answering using the information in the supplied CONTEXT. " |
| "Be concise, helpful, and, if useful, ask a brief follow-up question." |
| "When recommending universities, give the top 3 universities which are best suited for the persons prompt ONLY if the person asks for particular universities." |
| ) |
|
|
| def respond(message, history): |
| |
| selected_chunks, idxs = get_relevant_context(message, top_k=3) |
| context_block = "\n\n".join(selected_chunks) |
|
|
| |
| messages = [ |
| {"role": "system", "content": SYSTEM_ROLE}, |
| {"role": "system", "content": f"CONTEXT:\n{context_block}"} |
| ] |
|
|
| |
| if history: |
| |
| |
| messages.extend(history[-6:]) |
|
|
| messages.append({"role": "user", "content": message}) |
|
|
| |
| response = client.chat_completion( |
| messages=messages, |
| max_tokens=500, |
| temperature=0.0, |
| top_p=1.0 |
| ) |
| answer = response['choices'][0]['message']['content'].strip() |
| return answer |
| chatbot = gr.ChatInterface(respond, type="messages",theme = gr.themes.Citrus()) |
| |
| title = "# 📖Welcome to CASSI 📖 " |
|
|
| topics = """ |
| ## Welcome to Cassi! |
| I’m here to help you find your future university and discover what you want to do in the future. |
| To give me an idea of how to help you, tell me... |
| - 👩🏻🏫 Your predicted grades for A-levels |
| - 📚 The subjects that interest you |
| - 💪 Your strengths |
| Start by asking me a question!🙋♀️ |
| """ |
|
|
| with gr.Blocks() as chatbot: |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown(title) |
| gr.Markdown(topics) |
| |
| with gr.Column(scale=2): |
| gr.ChatInterface( |
| fn=respond, |
| type="messages" |
| ) |
|
|
| chatbot.launch() |
|
|