import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
import pandas as pd

# Load and process the CSV file
df = pd.read_csv("raw_yelp_review_data_short.csv")

# Combine specific columns into chunks (replace with your actual column names)
chunks = (
    df['coffee_shop_name'].astype(str) + " | " +
    df['full_review_text'].astype(str) + " | " +
    df['star_rating'].astype(str)
).tolist()

# Load an embedding model (this one is light and fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Precompute embeddings for all chunks (as a tensor for fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)

def get_relevant_context(query, top_k=3):
    # Compute and normalize the query embedding
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    query_embedding = query_embedding / query_embedding.norm()
    
    # Normalize chunk embeddings along the embedding dimension
    norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
    
    # Compute cosine similarity between the query and each chunk
    similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
    
    # Get the indices of the top_k most similar chunks
    top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
    
    # Concatenate the top chunks into a single context string
    context = "\n\n".join([chunks[i] for i in top_k_indices])
    print(context)
    return context


client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def respond(message, history):
    context = get_relevant_context(message, top_k=3)
    messages = [{"role": "system", "content": f"You are chatbot specializing in Austin coffee shops. Use the following reviews to recommend coffee shops: {context}. The name of the coffee shop is listed first before the '|' in each review."}]
    
    if history:
        messages.extend(history)
        
    messages.append({"role": "user", "content": message})
    
    response = ""
    
    # Stream tokens from the model's response
    for message_chunk in client.chat_completion(
        messages,
        max_tokens=1000,
        stream=True
    ):
        # Some responses might not include a 'content' key, so we use .get()
        token = message_chunk['choices'][0]['delta'].get('content', '')
        response += token
    
    yield response

chatbot = gr.ChatInterface(respond, type="messages")

chatbot.launch()