First-chatbot / app.py
louisepxllock's picture
Update app.py
adf37bb verified
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
# Open the .txt file in read mode with UTF-8 encoding which you uploaded
with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file:
# Read the entire contents of the file and store it in a variabled
uni_dataset_text = file.read()
# Print the text below
print("success")
chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()]
embedder = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1)
def get_relevant_context(query, top_k=3):
query_embedding = embedder.encode(query, convert_to_tensor=True)
query_embedding = torch.nn.functional.normalize(query_embedding, dim=0)
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
k = min(top_k, similarities.shape[0])
top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist()
selected = [chunks[i] for i in top_k_indices]
return selected, top_k_indices
client = InferenceClient("microsoft/phi-4")
def respond(message, history):
messages = [{"role": "system", "content": "you are a realistic and friendly environmental activist whos aim is to promote the use of renewable energy."}]
if history:
messages.extend(history)
messages.append({"role": "user", "content":message})
response = client.chat_completion(
messages = messages,
temperature = 0.0,
max_tokens=500,
top_p = 1.0
)
return response['choices'][0]['message']['content'].strip()
chatbot = gr.ChatInterface(respond, type = "messages", title = "Watt the bot?") #chatbot ui - conversation history and user input
chatbot.launch()