Spaces:

louisepxllock
/

First-chatbot

Sleeping

App Files Files Community

First-chatbot / app.py

louisepxllock

Update app.py

adf37bb verified 4 months ago

raw

history blame contribute delete

1.9 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from sentence_transformers import SentenceTransformer
	import torch
	import numpy as np

	# Open the .txt file in read mode with UTF-8 encoding which you uploaded
	with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file:
	# Read the entire contents of the file and store it in a variabled
	uni_dataset_text = file.read()

	# Print the text below
	print("success")

	chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()]
	embedder = SentenceTransformer('all-MiniLM-L6-v2')
	chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True)
	norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1)

	def get_relevant_context(query, top_k=3):
	query_embedding = embedder.encode(query, convert_to_tensor=True)
	query_embedding = torch.nn.functional.normalize(query_embedding, dim=0)
	similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
	k = min(top_k, similarities.shape[0])
	top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist()
	selected = [chunks[i] for i in top_k_indices]
	return selected, top_k_indices


	client = InferenceClient("microsoft/phi-4")
	def respond(message, history):
	messages = [{"role": "system", "content": "you are a realistic and friendly environmental activist whos aim is to promote the use of renewable energy."}]
	if history:
	messages.extend(history)
	messages.append({"role": "user", "content":message})
	response = client.chat_completion(
	messages = messages,
	temperature = 0.0,
	max_tokens=500,
	top_p = 1.0
	)
	return response['choices'][0]['message']['content'].strip()

	chatbot = gr.ChatInterface(respond, type = "messages", title = "Watt the bot?") #chatbot ui - conversation history and user input
	chatbot.launch()