import gradio as gr from huggingface_hub import InferenceClient from sentence_transformers import SentenceTransformer import torch import numpy as np # Open the .txt file in read mode with UTF-8 encoding which you uploaded with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file: # Read the entire contents of the file and store it in a variabled uni_dataset_text = file.read() # Print the text below print("success") chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()] embedder = SentenceTransformer('all-MiniLM-L6-v2') chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True) norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1) def get_relevant_context(query, top_k=3): query_embedding = embedder.encode(query, convert_to_tensor=True) query_embedding = torch.nn.functional.normalize(query_embedding, dim=0) similarities = torch.matmul(norm_chunk_embeddings, query_embedding) k = min(top_k, similarities.shape[0]) top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist() selected = [chunks[i] for i in top_k_indices] return selected, top_k_indices client = InferenceClient("microsoft/phi-4") def respond(message, history): messages = [{"role": "system", "content": "you are a realistic and friendly environmental activist whos aim is to promote the use of renewable energy."}] if history: messages.extend(history) messages.append({"role": "user", "content":message}) response = client.chat_completion( messages = messages, temperature = 0.0, max_tokens=500, top_p = 1.0 ) return response['choices'][0]['message']['content'].strip() chatbot = gr.ChatInterface(respond, type = "messages", title = "Watt the bot?") #chatbot ui - conversation history and user input chatbot.launch()