Spaces:

ditchthecoffee
/

ourproject

Sleeping

App Files Files Community

ourproject / app.py

sofia-koe

NewBanner

6a9b2d1 verified 9 months ago

raw

history blame contribute delete

4.85 kB

	import gradio as gr
	from huggingface_hub import InferenceClient #imports huggingface models
	import os
	#newlibraries
	from sentence_transformers import SentenceTransformer
	import torch
	import numpy as np

	# Load and process the knowledge base text file
	with open("knowledge.txt", "r", encoding="utf-8") as f:
	knowledge_text = f.read()

	# Split the text into chunks (for example, by paragraphs)
	chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()]

	# Load an embedding model (this one is light and fast)
	embedder = SentenceTransformer('all-MiniLM-L6-v2')

	# Precompute embeddings for all chunks (as a tensor for fast similarity search)
	chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)

	def get_relevant_context(query, top_k=3):
	"""
	Compute the embedding for the query, compare it against all chunk embeddings,
	and return the top_k most similar chunks concatenated into a context string.
	"""

	# Compute and normalize the query embedding
	query_embedding = embedder.encode(query, convert_to_tensor=True)
	query_embedding = query_embedding / query_embedding.norm()

	# Normalize chunk embeddings along the embedding dimension
	norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)

	# Compute cosine similarity between the query and each chunk
	similarities = torch.matmul(norm_chunk_embeddings, query_embedding)

	# Get the indices of the top_k most similar chunks
	top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()

	# Concatenate the top chunks into a single context string
	context = "\n\n".join([chunks[i] for i in top_k_indices])
	return context

	# Define a simple soft theme
	chat_theme = gr.themes.Soft(
	primary_hue="pink",
	secondary_hue="blue",
	neutral_hue="green",
	spacing_size="md",
	radius_size="md",
	font=[gr.themes.GoogleFont("Gayathri")]
	)

	client = InferenceClient("google/gemma-2-2b-it")

	def respond(message, history):
	messages = [{"role": "system", "content": "you’re a drink‑loving friend named Bev who guides folks toward tasty non‑caffeinated, non‑coffee beverages—whether they’re trying to quit coffee, explore something new and flavorful, or find a welcoming café with coffee‑free options using ONLY lowercase letters! you respond warmly and conversationally, offering empathy (e.g. “quitting coffee can be tough, but there are so many delicious alternatives!”) and personalized suggestions like herbal teas, mocktails, fruit‑based drinks, warm brews or chilled infusions. you share simple recipes or steps to make them at home, and when asked for recommendations, you recommend ONLY from the context of the provided knowledge file. you may mention ingredient swaps, tools like blenders or infusers, or seasonal tips. your tone is lighthearted, helpful, and supportive, in lowercase, sounding like a teenage girl. you tailor advice based on the user’s preferences and follow up with questions like “do you prefer warm or cold?” or “want something sweet, herbal, or fizzy?” if users ask about unrelated topics (animals, sports, historical figures), explain politely that you only help with drinks, briefly mention the figure’s favorite beverage (like matcha or boba), and refocus on drinks—never praising or providing ANY coffee recipes (even eg. Cold brew or Iced Coffee) or suggesting alcohol."}]

	# Retrieve context relevant to the current user message
	context = get_relevant_context(message, top_k=3)
	# add all previous messages to the messages list
	if history:
	for turn in history:
	messages.append({"role": turn["role"], "content": turn["content"]})

	# add the current user's message to the messages list
	messages.append({"role": "user", "content": message})

	# makes the chat completion API call,
	# sending the messages and other parameters to the model
	# implements streaming, where one word/token appears at a time
	response = ""

	# iterate through each message in the method
	for message in client.chat_completion(
	messages,
	max_tokens=500,
	temperature=.1,
	stream=True):

	# add the tokens to the output content
	token = message.choices[0].delta.content # capture the most recent toke
	response += token # Add it to the response
	yield response # yield the response:

	with gr.Blocks(theme=chat_theme) as chatbot:
	gr.Image(
	value="NewBanner.png",
	show_label=False,
	show_share_button=False,
	show_download_button=False
	)

	gr.ChatInterface(respond, type ="messages", title = "Ditch the Coffee.", examples = ["what's a good smoothie recipe?", "help me find a local cafe with matcha", "how do i stop drinking coffee?"])

	chatbot.launch()