Spaces:

mingbaer
/

PracticingGenerativeAI

Sleeping

App Files Files Community

PracticingGenerativeAI / app.py

mingbaer

Update app.py

7e1cbaa verified 5 months ago

raw

history blame contribute delete

2.52 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	# SEMANTIC SEARCH STEP 1
	from sentence_transformers import SentenceTransformer
	import torch

	# SEMANTIC SEARCH STEP 2 --> EDIT WITH YOUR OWN KNOWLEDGEBASE WHEN READY
	with open("water_cycle.txt", "r", encoding="utf-8") as file:
	water_cycle_text = file.read()
	print(water_cycle_text)

	# SEMANTIC SEARCH STEP 3
	def preprocess_text(text):
	cleaned_text = text.strip()
	chunks = cleaned_text.split("\n")
	cleaned_chunks = []
	for chunk in chunks:
	stripped_chunk = chunk.strip()
	cleaned_chunks.append(stripped_chunk)
	print(cleaned_chunks)
	print(len(cleaned_chunks))
	return cleaned_chunks

	cleaned_chunks = preprocess_text(water_cycle_text) # edit this with my knowledgebase when ready

	# SEMANTIC SEARCH STEP 4
	model = SentenceTransformer('all-MiniLM-L6-v2')

	def create_embeddings(text_chunks):
	chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
	print(chunk_embeddings)
	print(chunk_embeddings.shape)
	return chunk_embeddings

	chunk_embeddings = create_embeddings(cleaned_chunks)

	# SEMANTIC SEARCH STEP 5
	def get_top_chunks(query, chunk_embeddings, text_chunks):
	query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line
	query_embedding_normalized = query_embedding / query_embedding.norm()
	chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
	similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) # Complete this line
	print(similarities)
	top_indices = torch.topk(similarities, k=3).indices
	print(top_indices)
	top_chunks = []
	for i in top_indices:
	relevant_info = text_chunks[i]
	top_chunks.append(relevant_info)

	return top_chunks

	client = InferenceClient("microsoft/phi-4")

	def respond(message, history):

	info = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
	messages = [{"role": "system", "content": f"You are an angry teacher chatbot using {info} to answer questions but always responding by complaining about your students."}]

	if history:
	messages.extend(history)

	messages.append({"role": "user", "content": message})

	response = client.chat_completion(
	messages,
	max_tokens=100,
	temperature = .5
	)

	return response['choices'][0]['message']['content'].strip()

	chatbot = gr.ChatInterface(respond, type="messages")

	chatbot.launch(debug=True, share=True)