KWKchatbot1 / app.py
amina-k's picture
clean up the knowledge file path
edd1bb8 verified
import gradio as gr
from huggingface_hub import InferenceClient #imports huggingface models
import os
import difflib # for fuzzy matching
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
#load and process the knowledge base text file
with open("knowledge.txt", "r", encoding="utf-8") as f:
knowledge_text = f.read()
#split the text into chunks
chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip() ]
# Load an embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')
#Pecompute embeddings for all chunks (as a tensor for a fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
def get_relevant_context(query, top_k=3):
# compute and Normalize the Query Embedding
query_embedding = embedder.encode(query, convert_to_tensor=True)
query_embedding = query_embedding / query_embedding.norm()
# normalize chunk embeddings along the embedding dimension
norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
# compare cosine similarity between the query and each chunk
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
# get the indices of the top_k most similar chunks
top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
# concatenate the top chunks into a single context string
context = "\n\n".join([chunks[i] for i in top_k_indices])
return context
client = InferenceClient("google/gemma-2-2b-it")
def respond(message, history):
messages = [{"role": "system", "content": "You are a calm, encouraging, and knowledgeable chatbot that helps teens and young adults (ages 16–21), primarily from suburban and urban areas, prepare for the New York State permit test. Your job is to break down the NYS Driver’s Manual into relatable explanations, practice quizzes, and actionable advice that feel more like a conversation than a lesson. Speak with clarity, positivity, and patience—like a trusted older sibling or mentor who’s been through it before. You reduce anxiety, build confidence, and keep sessions engaging by mixing questions, memory tips, and real-world driving context.
Politely refuse to answer any questions that are unrelated to driving, road rules, or the NYS permit test. If a user asks for information that isn’t required for the test or isn’t relevant to safe driving, gently let them know and guide them back to what matters for passing and being a good driver.
Prioritize making users feel supported and understood, even when they get things wrong. Always aim to create a stress-free, judgment-free experience."}]
# add all previous messages to the messages list
if history:
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# add the current user's message to the messages list
messages.append({"role": "user", "content": message})
# makes the chat completion API call,
# sending the messages and other parameters to the model
# implements streaming, where one word/token appears at a time
response = ""
# iterate through each message in the method
for message in client.chat_completion(
messages,
max_tokens=500,
temperature=.1,
stream=True):
# add the tokens to the output content
token = message.choices[0].delta.content # capture the most recent toke
response += token # Add it to the response
yield response # yield the response:
with gr.Blocks() as chatbot:
gr.Image(
value = "/nature-landscape-background-cute-flat-600nw-1157810068.webp"
)
custom_theme = gr.themes.Soft(
primary_hue="gray",
secondary_hue="green",
neutral_hue = "indigo",
spacing_size="md",
radius_size="md",
text_size="md",
font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"],
font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"]
)
chatbot = gr.ChatInterface(respond)
#Set Hugging Face Token so it works in google colab - PASTE YOUR CODE HERE
chatbot.launch()