import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np

# Open the .txt file in read mode with UTF-8 encoding which you uploaded
with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file:
  # Read the entire contents of the file and store it in a variabled
  uni_dataset_text = file.read()

# Print the text below
print("success")

chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()]
embedder = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1)

def get_relevant_context(query, top_k=3):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    query_embedding = torch.nn.functional.normalize(query_embedding, dim=0)
    similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
    k = min(top_k, similarities.shape[0])
    top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist()
    selected = [chunks[i] for i in top_k_indices]
    return selected, top_k_indices

    
client = InferenceClient("microsoft/phi-4")
def respond(message, history):
    messages = [{"role": "system", "content": "you are a realistic and friendly environmental activist whos aim is to promote the use of renewable energy."}]
    if history:
        messages.extend(history)
    messages.append({"role": "user", "content":message})
    response = client.chat_completion(
        messages = messages,
        temperature = 0.0,
        max_tokens=500,
        top_p = 1.0
    )
    return response['choices'][0]['message']['content'].strip()

chatbot = gr.ChatInterface(respond, type = "messages", title = "Watt the bot?") #chatbot ui - conversation history and user input
chatbot.launch()