# importing all the tools we need
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch

# setting up embedding model and connecting w/model
model = SentenceTransformer('all-MiniLM-L6-v2')
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")

# loading knowledge base
with open("tenet_knowledge_base.txt", "r", encoding="utf-8") as file:
    tenet_knowledge_base = file.read()

# cleaning and splitting the text base
cleaned_text = tenet_knowledge_base.strip()
chunks = cleaned_text.split("\n")
cleaned_chunks = []

# creating chunks, converting into vectors
cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)

# returning top three chunks w/cosine similarity
def get_top_chunks(query):
    query_embedding = model.encode(query, convert_to_tensor=True)
    similarities = torch.nn.functional.cosine_similarity(
        query_embedding.unsqueeze(0),
        chunk_embeddings
    )
    top_indices = torch.topk(similarities, k=3).indices
    return [cleaned_chunks[i] for i in top_indices]

# converting into a readable format for the Qwen model
def format_context(chunks):
    return "\n".join([f"Context {i+1}: {chunk}" for i, chunk in enumerate(chunks)])

# main chat response, building system prompt, saving chat history
def respond(message, history):
    top_chunks = get_top_chunks(message)
    context = format_context(top_chunks)
    
    system_prompt = f"""You are a knowledgeable and kind assistant. Use this context to answer:
    {context}
    Make sure that you clarify that information may be inaccurate. If you do not know, please state that you are unsure. Give a response in two sentences or less. Also, embed as many links as possible and make sure to hyperlink whenever you can, but you do not have to provide links unless you have them. DO NOT MAKE UP FAKE LINKS."""
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": message}
    ]

    # using streaming to return response as it's generated
    response = ""
    for chunk in client.chat_completion(messages, max_tokens=200, temperature=0.15, top_p=0.7,stream=True):
        token = chunk.choices[0].delta.content
        if token:
            response += token
            yield response

# gradio main interface
gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(height=400),
    textbox=gr.Textbox(placeholder="Ask your questions here..."),
    examples = ["How can I make a difference if I’m not old enough to vote?","What’s the best way to get involved in a cause I care about?","How do I know if the information I’m sharing or supporting is true?"]).launch()