kaitwithkwk's picture
Update app.py
3c2146a verified
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
import pandas as pd
# Load and process the CSV file
df = pd.read_csv("raw_yelp_review_data_short.csv")
# Combine specific columns into chunks (replace with your actual column names)
chunks = (
df['coffee_shop_name'].astype(str) + " | " +
df['full_review_text'].astype(str) + " | " +
df['star_rating'].astype(str)
).tolist()
# Load an embedding model (this one is light and fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Precompute embeddings for all chunks (as a tensor for fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
def get_relevant_context(query, top_k=3):
# Compute and normalize the query embedding
query_embedding = embedder.encode(query, convert_to_tensor=True)
query_embedding = query_embedding / query_embedding.norm()
# Normalize chunk embeddings along the embedding dimension
norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
# Compute cosine similarity between the query and each chunk
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
# Get the indices of the top_k most similar chunks
top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
# Concatenate the top chunks into a single context string
context = "\n\n".join([chunks[i] for i in top_k_indices])
print(context)
return context
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def respond(message, history):
context = get_relevant_context(message, top_k=3)
messages = [{"role": "system", "content": f"You are chatbot specializing in Austin coffee shops. Use the following reviews to recommend coffee shops: {context}. The name of the coffee shop is listed first before the '|' in each review."}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = ""
# Stream tokens from the model's response
for message_chunk in client.chat_completion(
messages,
max_tokens=1000,
stream=True
):
# Some responses might not include a 'content' key, so we use .get()
token = message_chunk['choices'][0]['delta'].get('content', '')
response += token
yield response
chatbot = gr.ChatInterface(respond, type="messages")
chatbot.launch()