ourproject / app.py
sofia-koe's picture
NewBanner
6a9b2d1 verified
import gradio as gr
from huggingface_hub import InferenceClient #imports huggingface models
import os
#newlibraries
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
# Load and process the knowledge base text file
with open("knowledge.txt", "r", encoding="utf-8") as f:
knowledge_text = f.read()
# Split the text into chunks (for example, by paragraphs)
chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()]
# Load an embedding model (this one is light and fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Precompute embeddings for all chunks (as a tensor for fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
def get_relevant_context(query, top_k=3):
"""
Compute the embedding for the query, compare it against all chunk embeddings,
and return the top_k most similar chunks concatenated into a context string.
"""
# Compute and normalize the query embedding
query_embedding = embedder.encode(query, convert_to_tensor=True)
query_embedding = query_embedding / query_embedding.norm()
# Normalize chunk embeddings along the embedding dimension
norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
# Compute cosine similarity between the query and each chunk
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
# Get the indices of the top_k most similar chunks
top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
# Concatenate the top chunks into a single context string
context = "\n\n".join([chunks[i] for i in top_k_indices])
return context
# Define a simple soft theme
chat_theme = gr.themes.Soft(
primary_hue="pink",
secondary_hue="blue",
neutral_hue="green",
spacing_size="md",
radius_size="md",
font=[gr.themes.GoogleFont("Gayathri")]
)
client = InferenceClient("google/gemma-2-2b-it")
def respond(message, history):
messages = [{"role": "system", "content": "you’re a drink‑loving friend named Bev who guides folks toward tasty non‑caffeinated, non‑coffee beverages—whether they’re trying to quit coffee, explore something new and flavorful, or find a welcoming café with coffee‑free options using ONLY lowercase letters! you respond warmly and conversationally, offering empathy (e.g. “quitting coffee can be tough, but there are so many delicious alternatives!”) and personalized suggestions like herbal teas, mocktails, fruit‑based drinks, warm brews or chilled infusions. you share simple recipes or steps to make them at home, and when asked for recommendations, you recommend ONLY from the context of the provided knowledge file. you may mention ingredient swaps, tools like blenders or infusers, or seasonal tips. your tone is lighthearted, helpful, and supportive, in lowercase, sounding like a teenage girl. you tailor advice based on the user’s preferences and follow up with questions like “do you prefer warm or cold?” or “want something sweet, herbal, or fizzy?” if users ask about unrelated topics (animals, sports, historical figures), explain politely that you only help with drinks, briefly mention the figure’s favorite beverage (like matcha or boba), and refocus on drinks—never praising or providing ANY coffee recipes (even eg. Cold brew or Iced Coffee) or suggesting alcohol."}]
# Retrieve context relevant to the current user message
context = get_relevant_context(message, top_k=3)
# add all previous messages to the messages list
if history:
for turn in history:
messages.append({"role": turn["role"], "content": turn["content"]})
# add the current user's message to the messages list
messages.append({"role": "user", "content": message})
# makes the chat completion API call,
# sending the messages and other parameters to the model
# implements streaming, where one word/token appears at a time
response = ""
# iterate through each message in the method
for message in client.chat_completion(
messages,
max_tokens=500,
temperature=.1,
stream=True):
# add the tokens to the output content
token = message.choices[0].delta.content # capture the most recent toke
response += token # Add it to the response
yield response # yield the response:
with gr.Blocks(theme=chat_theme) as chatbot:
gr.Image(
value="NewBanner.png",
show_label=False,
show_share_button=False,
show_download_button=False
)
gr.ChatInterface(respond, type ="messages", title = "Ditch the Coffee.", examples = ["what's a good smoothie recipe?", "help me find a local cafe with matcha", "how do i stop drinking coffee?"])
chatbot.launch()