Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient #imports huggingface models | |
| import os | |
| #newlibraries | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| import numpy as np | |
| # Load and process the knowledge base text file | |
| with open("knowledge.txt", "r", encoding="utf-8") as f: | |
| knowledge_text = f.read() | |
| # Split the text into chunks (for example, by paragraphs) | |
| chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()] | |
| # Load an embedding model (this one is light and fast) | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Precompute embeddings for all chunks (as a tensor for fast similarity search) | |
| chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True) | |
| def get_relevant_context(query, top_k=3): | |
| """ | |
| Compute the embedding for the query, compare it against all chunk embeddings, | |
| and return the top_k most similar chunks concatenated into a context string. | |
| """ | |
| # Compute and normalize the query embedding | |
| query_embedding = embedder.encode(query, convert_to_tensor=True) | |
| query_embedding = query_embedding / query_embedding.norm() | |
| # Normalize chunk embeddings along the embedding dimension | |
| norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) | |
| # Compute cosine similarity between the query and each chunk | |
| similarities = torch.matmul(norm_chunk_embeddings, query_embedding) | |
| # Get the indices of the top_k most similar chunks | |
| top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy() | |
| # Concatenate the top chunks into a single context string | |
| context = "\n\n".join([chunks[i] for i in top_k_indices]) | |
| return context | |
| # Define a simple soft theme | |
| chat_theme = gr.themes.Soft( | |
| primary_hue="pink", | |
| secondary_hue="blue", | |
| neutral_hue="green", | |
| spacing_size="md", | |
| radius_size="md", | |
| font=[gr.themes.GoogleFont("Gayathri")] | |
| ) | |
| client = InferenceClient("google/gemma-2-2b-it") | |
| def respond(message, history): | |
| messages = [{"role": "system", "content": "you’re a drink‑loving friend named Bev who guides folks toward tasty non‑caffeinated, non‑coffee beverages—whether they’re trying to quit coffee, explore something new and flavorful, or find a welcoming café with coffee‑free options using ONLY lowercase letters! you respond warmly and conversationally, offering empathy (e.g. “quitting coffee can be tough, but there are so many delicious alternatives!”) and personalized suggestions like herbal teas, mocktails, fruit‑based drinks, warm brews or chilled infusions. you share simple recipes or steps to make them at home, and when asked for recommendations, you recommend ONLY from the context of the provided knowledge file. you may mention ingredient swaps, tools like blenders or infusers, or seasonal tips. your tone is lighthearted, helpful, and supportive, in lowercase, sounding like a teenage girl. you tailor advice based on the user’s preferences and follow up with questions like “do you prefer warm or cold?” or “want something sweet, herbal, or fizzy?” if users ask about unrelated topics (animals, sports, historical figures), explain politely that you only help with drinks, briefly mention the figure’s favorite beverage (like matcha or boba), and refocus on drinks—never praising or providing ANY coffee recipes (even eg. Cold brew or Iced Coffee) or suggesting alcohol."}] | |
| # Retrieve context relevant to the current user message | |
| context = get_relevant_context(message, top_k=3) | |
| # add all previous messages to the messages list | |
| if history: | |
| for turn in history: | |
| messages.append({"role": turn["role"], "content": turn["content"]}) | |
| # add the current user's message to the messages list | |
| messages.append({"role": "user", "content": message}) | |
| # makes the chat completion API call, | |
| # sending the messages and other parameters to the model | |
| # implements streaming, where one word/token appears at a time | |
| response = "" | |
| # iterate through each message in the method | |
| for message in client.chat_completion( | |
| messages, | |
| max_tokens=500, | |
| temperature=.1, | |
| stream=True): | |
| # add the tokens to the output content | |
| token = message.choices[0].delta.content # capture the most recent toke | |
| response += token # Add it to the response | |
| yield response # yield the response: | |
| with gr.Blocks(theme=chat_theme) as chatbot: | |
| gr.Image( | |
| value="NewBanner.png", | |
| show_label=False, | |
| show_share_button=False, | |
| show_download_button=False | |
| ) | |
| gr.ChatInterface(respond, type ="messages", title = "Ditch the Coffee.", examples = ["what's a good smoothie recipe?", "help me find a local cafe with matcha", "how do i stop drinking coffee?"]) | |
| chatbot.launch() |