File size: 4,497 Bytes
f0eb710
c452c9d
a418c45
 
 
 
f0eb710
a418c45
 
b079bd8
a418c45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f23e7e
 
7a3a684
 
 
7fd04ee
 
 
1206b4a
 
2f23e7e
 
 
7fd04ee
f0eb710
 
a418c45
f0eb710
3cd5f04
a418c45
 
 
f0eb710
 
 
 
 
a418c45
f0eb710
 
 
a418c45
f0eb710
 
 
a418c45
f0eb710
 
a418c45
 
f0eb710
 
 
 
 
a418c45
 
 
 
f0eb710
509e9d8
2fa20be
 
 
 
 
 
1206b4a
 
 
fed1666
1206b4a
 
 
8531004
 
78b75b3
af82db3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
import os
from huggingface_hub import InferenceClient #imports huggingface models
from sentence_transformers import SentenceTransformer
import torch
import numpy as np


# Load and process the knowledge base text file
with open("knowledge.txt", "r", encoding="utf-8") as f:
    knowledge_text = f.read()


# Split the text into chunks (for example, by paragraphs)
chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()]


# Load an embedding model (this one is light and fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')


# Precompute embeddings for all chunks (as a tensor for fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
def get_relevant_context(query, top_k=3):
    """
     Compute the embedding for the query, compare it against all chunk embeddings,
     and return the top_k most similar chunks concatenated into a context string.
    """
    # Compute and normalize the query embedding
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    query_embedding = query_embedding / query_embedding.norm()
    
    # Normalize chunk embeddings along the embedding dimension
    norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
    
    # Compute cosine similarity between the query and each chunk
    similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
    
    # Get the indices of the top_k most similar chunks
    top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
    
    # Concatenate the top chunks into a single context string
    context = "\n\n".join([chunks[i] for i in top_k_indices])
    return context


custom_theme = gr.themes.Soft(
    primary_hue="green",
    secondary_hue="stone",
    neutral_hue="gray",
    spacing_size="md",
    radius_size="md",
    text_size="md",
    font=["Roboto", "sans-serif"],
    font_mono=["Roboto Mono", "monospace"],
)



client = InferenceClient("google/gemma-2-2b-it")


def respond(message, history):
    messages = [{"role": "system", "content": "You are ChaChingas, an AI financial advisor for students and low-income families. Only answer questions about budgeting, saving, debt, credit card fraud, investing, and finance. If a user asks about unrelated topics like recipes, sports, or entertainment, politely say: 'I'm here to help with money and budgeting—ask me anything about that!' Speak clearly, keep answers short, and use simple language. When asked about budgeting, explain the 50/30/20 rule: 50% for needs, 30% for wants, 20% for savings or debt. Be supportive, practical, and easy to understand. Avoid giving tax or legal advice, and never ask for or handle sensitive personal financial information."}]
    context = get_relevant_context(message, top_k=3)
    
# add all previous messages to the messages list
    if history:
        for user_msg, assistant_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})

            
    # add the current user's message to the messages list
    messages.append({"role": "user", "content": message})

    
    # makes the chat completion API call,
    # sending the messages and other parameters to the model
    # implements streaming, where one word/token appears at a time
    
    response = ""

    
    # iterate through each message in the method
    for message in client.chat_completion(
        messages,
        max_tokens=500,
        temperature=.1,
        stream=True):
        # add the tokens to the output content
          token = message.choices[0].delta.content # capture the most recent toke
          response += token # Add it to the response
          yield response # yield the response:

with gr.Blocks(theme=custom_theme) as demo:
    #gr.Image(
        #value="Banner.png",
        #show_label=False,
        #show_share_button=False,
        #show_download_button=False
    #)

    chatbot_interface = gr.ChatInterface(
        respond,
        examples=["Build a Budgeting Plan","Teach Me About Stocks","How Do I Set Up a Bank Account?", "How Do I Prevent Credit Card Fraud?"],
        title="ChaChingas",
        description="This is a financial literacy chatbot"
    )


demo.launch()

#build on your original chatbot from the previous lesson
#a basic chatbot from the previous lesson is below -- edit it to incorporate the changes described above