import gradio as gr from huggingface_hub import InferenceClient from sentence_transformers import SentenceTransformer import torch import numpy as np import pandas as pd # Load and process the CSV file df = pd.read_csv("raw_yelp_review_data_short.csv") # Combine specific columns into chunks (replace with your actual column names) chunks = ( df['coffee_shop_name'].astype(str) + " | " + df['full_review_text'].astype(str) + " | " + df['star_rating'].astype(str) ).tolist() # Load an embedding model (this one is light and fast) embedder = SentenceTransformer('all-MiniLM-L6-v2') # Precompute embeddings for all chunks (as a tensor for fast similarity search) chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True) def get_relevant_context(query, top_k=3): # Compute and normalize the query embedding query_embedding = embedder.encode(query, convert_to_tensor=True) query_embedding = query_embedding / query_embedding.norm() # Normalize chunk embeddings along the embedding dimension norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) # Compute cosine similarity between the query and each chunk similarities = torch.matmul(norm_chunk_embeddings, query_embedding) # Get the indices of the top_k most similar chunks top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy() # Concatenate the top chunks into a single context string context = "\n\n".join([chunks[i] for i in top_k_indices]) print(context) return context client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond(message, history): context = get_relevant_context(message, top_k=3) messages = [{"role": "system", "content": f"You are chatbot specializing in Austin coffee shops. Use the following reviews to recommend coffee shops: {context}. The name of the coffee shop is listed first before the '|' in each review."}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) response = "" # Stream tokens from the model's response for message_chunk in client.chat_completion( messages, max_tokens=1000, stream=True ): # Some responses might not include a 'content' key, so we use .get() token = message_chunk['choices'][0]['delta'].get('content', '') response += token yield response chatbot = gr.ChatInterface(respond, type="messages") chatbot.launch()