Spaces:

kaitwithkwk
/

capstone-with-csv

Sleeping

App Files Files Community

capstone-with-csv / app.py

kaitwithkwk

Update app.py

3c2146a verified 6 months ago

raw

history blame contribute delete

2.54 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from sentence_transformers import SentenceTransformer
	import torch
	import numpy as np
	import pandas as pd

	# Load and process the CSV file
	df = pd.read_csv("raw_yelp_review_data_short.csv")

	# Combine specific columns into chunks (replace with your actual column names)
	chunks = (
	df['coffee_shop_name'].astype(str) + " \| " +
	df['full_review_text'].astype(str) + " \| " +
	df['star_rating'].astype(str)
	).tolist()

	# Load an embedding model (this one is light and fast)
	embedder = SentenceTransformer('all-MiniLM-L6-v2')

	# Precompute embeddings for all chunks (as a tensor for fast similarity search)
	chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)

	def get_relevant_context(query, top_k=3):
	# Compute and normalize the query embedding
	query_embedding = embedder.encode(query, convert_to_tensor=True)
	query_embedding = query_embedding / query_embedding.norm()

	# Normalize chunk embeddings along the embedding dimension
	norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)

	# Compute cosine similarity between the query and each chunk
	similarities = torch.matmul(norm_chunk_embeddings, query_embedding)

	# Get the indices of the top_k most similar chunks
	top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()

	# Concatenate the top chunks into a single context string
	context = "\n\n".join([chunks[i] for i in top_k_indices])
	print(context)
	return context


	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	def respond(message, history):
	context = get_relevant_context(message, top_k=3)
	messages = [{"role": "system", "content": f"You are chatbot specializing in Austin coffee shops. Use the following reviews to recommend coffee shops: {context}. The name of the coffee shop is listed first before the '\|' in each review."}]

	if history:
	messages.extend(history)

	messages.append({"role": "user", "content": message})

	response = ""

	# Stream tokens from the model's response
	for message_chunk in client.chat_completion(
	messages,
	max_tokens=1000,
	stream=True
	):
	# Some responses might not include a 'content' key, so we use .get()
	token = message_chunk['choices'][0]['delta'].get('content', '')
	response += token

	yield response

	chatbot = gr.ChatInterface(respond, type="messages")

	chatbot.launch()