kaitwithkwk's picture
Update app.py
32f5a5e verified
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
# Load and process the knowledge base text file
with open("knowledge.txt", "r", encoding="utf-8") as f:
knowledge_text = f.read()
# Split the text into chunks (for example, by paragraphs)
chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()]
# Load an embedding model (this one is light and fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Precompute embeddings for all chunks (as a tensor for fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
def get_relevant_context(query, top_k=3):
"""
Compute the embedding for the query, compare it against all chunk embeddings,
and return the top_k most similar chunks concatenated into a context string.
"""
# Compute and normalize the query embedding
query_embedding = embedder.encode(query, convert_to_tensor=True)
query_embedding = query_embedding / query_embedding.norm()
# Normalize chunk embeddings along the embedding dimension
norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
# Compute cosine similarity between the query and each chunk
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
# Get the indices of the top_k most similar chunks
top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
# Concatenate the top chunks into a single context string
context = "\n\n".join([chunks[i] for i in top_k_indices])
return context
# Instantiate the inference client for your model
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def respond(message, history):
# Retrieve context relevant to the current user message
context = get_relevant_context(message, top_k=3)
# Build a system message that includes the retrieved context and user profile
system_message = f"You are FinWiz, a friendly financial chatbot. Use the following context to help answer the user's question:\n\n{context}\n\n"
messages = [{"role": "system", "content": system_message}]
# Optionally include the conversation history
if history:
messages.extend(history)
# Append the current user message
messages.append({"role": "user", "content": message})
response = ""
# Stream tokens from the model's response
for message_chunk in client.chat_completion(
messages,
max_tokens=100,
stream=True
):
# Some responses might not include a 'content' key, so we use .get()
token = message_chunk['choices'][0]['delta'].get('content', '')
response += token
yield response
# Create the Gradio chat interface (using Gradio's ChatInterface)
title = "# 💸 FinWiz"
topics = """
### Welcome to FinWiz (financial wizard), your AI-driven assistant for all personal finance questions.
Feel free to ask me anything from the topics below!
- Budgeting Tips – Expense tracking, 50/30/20 rule
- Saving Strategies – Emergency fund, automation
- Debt Management – Snowball vs. Avalanche
- Credit Scores – Boosting, credit cards
- Investing Basics – Stocks, ETFs, diversification
- Retirement Planning – 401(k), Roth IRA
- Tax Tips – Deductions, tax-efficient investing
- Big Purchases – Home, car, college
- Income Growth – Salary negotiation, side gigs
- Insurance Essentials – Health, life, risk management
"""
disclaimer = "FinWiz provides general educational information only and is not a substitute for professional financial advice. You are solely responsible for any financial decisions you make based on interactions with this chatbot. Market conditions change constantly, and this tool has limited knowledge about your personal financial situation. It's best to consult with qualified financial professionals before making investment decisions."
with gr.Blocks() as chatbot:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(title)
gr.Markdown(topics)
with gr.Column(scale=2):
chat = gr.ChatInterface(
fn=respond,
type="messages"
)
with gr.Row():
gr.Markdown(disclaimer)
chatbot.launch()