import gradio as gr import os from huggingface_hub import InferenceClient #imports huggingface models from sentence_transformers import SentenceTransformer import torch import numpy as np # Load and process the knowledge base text file with open("knowledge.txt", "r", encoding="utf-8") as f: knowledge_text = f.read() # Split the text into chunks (for example, by paragraphs) chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()] # Load an embedding model (this one is light and fast) embedder = SentenceTransformer('all-MiniLM-L6-v2') # Precompute embeddings for all chunks (as a tensor for fast similarity search) chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True) def get_relevant_context(query, top_k=3): """ Compute the embedding for the query, compare it against all chunk embeddings, and return the top_k most similar chunks concatenated into a context string. """ # Compute and normalize the query embedding query_embedding = embedder.encode(query, convert_to_tensor=True) query_embedding = query_embedding / query_embedding.norm() # Normalize chunk embeddings along the embedding dimension norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) # Compute cosine similarity between the query and each chunk similarities = torch.matmul(norm_chunk_embeddings, query_embedding) # Get the indices of the top_k most similar chunks top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy() # Concatenate the top chunks into a single context string context = "\n\n".join([chunks[i] for i in top_k_indices]) return context custom_theme = gr.themes.Soft( primary_hue="green", secondary_hue="stone", neutral_hue="gray", spacing_size="md", radius_size="md", text_size="md", font=["Roboto", "sans-serif"], font_mono=["Roboto Mono", "monospace"], ) client = InferenceClient("google/gemma-2-2b-it") def respond(message, history): messages = [{"role": "system", "content": "You are ChaChingas, an AI financial advisor for students and low-income families. Only answer questions about budgeting, saving, debt, credit card fraud, investing, and finance. If a user asks about unrelated topics like recipes, sports, or entertainment, politely say: 'I'm here to help with money and budgeting—ask me anything about that!' Speak clearly, keep answers short, and use simple language. When asked about budgeting, explain the 50/30/20 rule: 50% for needs, 30% for wants, 20% for savings or debt. Be supportive, practical, and easy to understand. Avoid giving tax or legal advice, and never ask for or handle sensitive personal financial information."}] context = get_relevant_context(message, top_k=3) # add all previous messages to the messages list if history: for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) # add the current user's message to the messages list messages.append({"role": "user", "content": message}) # makes the chat completion API call, # sending the messages and other parameters to the model # implements streaming, where one word/token appears at a time response = "" # iterate through each message in the method for message in client.chat_completion( messages, max_tokens=500, temperature=.1, stream=True): # add the tokens to the output content token = message.choices[0].delta.content # capture the most recent toke response += token # Add it to the response yield response # yield the response: with gr.Blocks(theme=custom_theme) as demo: #gr.Image( #value="Banner.png", #show_label=False, #show_share_button=False, #show_download_button=False #) chatbot_interface = gr.ChatInterface( respond, examples=["Build a Budgeting Plan","Teach Me About Stocks","How Do I Set Up a Bank Account?", "How Do I Prevent Credit Card Fraud?"], title="ChaChingas", description="This is a financial literacy chatbot" ) demo.launch() #build on your original chatbot from the previous lesson #a basic chatbot from the previous lesson is below -- edit it to incorporate the changes described above