Spaces:

dataprincess
/

Anjibot

Sleeping

App Files Files Community

Anjibot / app.py

dataprincess

Update app.py

931de63 verified over 1 year ago

raw

history blame contribute delete

10.6 kB

	import json
	import pandas as pd
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity
	import random
	from sentence_transformers import SentenceTransformer
	import gradio as gr
	import time

	# Load datasets
	lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str)
	doc_link_data = pd.read_csv('docs_link.csv')

	with open('anjibot_data.json', 'r', encoding='utf-8') as file:
	anjibot_data = json.load(file)

	def load_default_responses(filename):
	with open(filename, 'r', encoding='utf-8') as file:
	default_responses = file.readlines()
	return [response.strip() for response in default_responses]

	# Load default responses from file
	default_responses = load_default_responses('default_responses.txt')

	# Load Sentence Transformer model
	model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

	def encode_text(text):
	# Encode text using Sentence Transformer
	embeddings = model.encode([text])
	return embeddings[0]

	# function to answer general queries

	def answer_general_query(user_question):
	user_question_embedding = encode_text(user_question)

	questions = [item['question'] for item in anjibot_data]
	question_embeddings = np.array([encode_text(q) for q in questions])

	similarities = cosine_similarity([user_question_embedding], question_embeddings)
	most_similar_index = np.argmax(similarities)
	max_similarity = similarities[0][most_similar_index]

	# Set a threshold for similarity
	if max_similarity > 0.5:
	return anjibot_data[most_similar_index]['answer']
	elif max_similarity > 0.3:
	# Select a random default response
	default_response = random.choice(default_responses)
	return default_response
	else:
	return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos."


	def normalize_text(text):
	# Convert text to lowercase and remove non-alphanumeric characters
	clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace())
	# Split text into words and remove possessive forms
	words = clean_text.split()
	normalized_words = []
	for word in words:
	# Remove possessive apostrophe if present
	word = word.rstrip("'s")
	normalized_words.append(word)
	return set(normalized_words)

	exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in",
	"of", "and", 'mrs.', 'in', 'and', 'of', 'a',
	'for', 'the', 'with', 'by', 'at']

	# custom similarity matching function
	def word_lookup(text, query, exceptions=exceptions):
	# Normalize text and query
	text_words = normalize_text(text)
	query_words = normalize_text(query)

	# Find matching sequences excluding exceptions
	matching_sequences = set()
	for word in text_words:
	if word in query_words and word not in exceptions:
	matching_sequences.add(word)

	# Return the count of matching sequences
	return len(matching_sequences)

	def get_phone_number_response(best_match):
	if best_match['phone_number']:
	return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}."
	else:
	return "Sorry, the phone number is not available."

	def get_office_response(best_match):
	if best_match['office'] == "No longer in Babcock":
	return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}."
	elif best_match['office']:
	return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}."
	else:
	return "Sorry, the office location is not available."

	def get_basic_info_response(query, best_match):
	if "code" in query:
	return f"The course code for {best_match['course']} is {best_match['course_code']}"
	else:
	return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer."

	def get_default_response(best_match):
	return f"{best_match['course']} has the course code: {best_match['course_code']}"


	def process_query(query, best_match):
	if "phone number" in query or "number" in query:
	return get_phone_number_response(best_match)
	elif "office" in query:
	return get_office_response(best_match)
	elif any(word in query for word in ["lecturer", "who", "code"]):
	return get_basic_info_response(query, best_match)
	else:
	return get_default_response(best_match)

	def answer_lecturer_query(query):
	query = query.lower()
	max_score = 0
	best_match = None

	for index, row in lecturer_data.iterrows():
	text = f"{row['course']} {row['course_code']} {row['name']}".lower()
	score = word_lookup(query, text)

	# Find the highest score
	if score > max_score:
	max_score = score
	best_match = row

	if max_score >= 1:
	if any(word in query for word in ["cosc", "geds", "ged"]):
	for i, word in enumerate(query.split()):
	if word.isdigit():
	# Retrieve the prefix from the previous word
	query_course_code = f"{query.split()[i - 1]} {word}"
	if query_course_code.upper() == best_match['course_code']:
	return process_query(query, best_match)
	else:
	return "Sorry, I couldn't find info about the course you've mentioned."
	else:
	return process_query(query, best_match)
	else:
	return answer_general_query(query)

	def get_links_response(query, best_match):
	school_files = ["past questions", "pst questions", "pq", "pstq", "slides for"]
	study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"]

	if any(keyword in query for keyword in school_files):
	if best_match['School files Link'] != "Unavailable":
	return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you: {best_match['School files Link']}"
	else:
	return f"Oops! Sorry, I can't find slides or past questions for that course."
	elif any(keyword in query for keyword in study_smarter):
	if best_match['Study Smarter Link'] != "Unavailable":
	return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set: {best_match['Study Smarter Link']}"
	else:
	return f"I'm sorry, I can't find any study smarter study set for that course."

	def answer_doc_link_query(query):
	query = query.lower()
	max_score = 0
	best_match = None

	for index, row in doc_link_data.iterrows():
	text = f"{row['course']} {row['course_code']}".lower()
	score = word_lookup(query, text)

	# Find the highest score
	if score > max_score:
	max_score = score
	best_match = row

	if max_score >= 1:
	if any(word in query for word in ["cosc", "geds", "ged"]):
	for i, word in enumerate(query.split()):
	if word.isdigit():
	# Retrieve the prefix from the previous word
	query_course_code = f"{query.split()[i - 1]} {word}"
	if query_course_code.upper() == best_match['course_code']:
	return get_links_response(query, best_match)
	else:
	return "Sorry, I couldn't find info about the course you've mentioned."
	else:
	return get_links_response(query, best_match)

	else:
	return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the entire query."


	# Define function to determine intent
	def get_intent(query):
	# Define keywords or phrases associated with each intent
	lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"]
	doc_link_keywords = ["past questions", "pstq", "pq", "pst", "study materials", "flashcards", "studysmarter",
	"study smarter", "slides", "slide", "pdf"]
	unknown_keywords = ["email", "missed", "write"]

	# Check for keywords in the query
	query_lower = query.lower()
	if any(keyword in query_lower for keyword in unknown_keywords):
	return "unknown"
	elif any(keyword in query_lower for keyword in lecturer_keywords):
	return "lecturer"
	elif any(keyword in query_lower for keyword in doc_link_keywords):
	return "doc_link"
	else:
	return "general"


	def get_response(query):

	intent = get_intent(query)

	if query == "":
	response = "Yo! Don't send me stickers, I don't understand them anyway 😕"
	elif intent == "unknown":
	response = "Ugh, your query is quite beyond me. Please meet Anji directly :)"
	elif intent == "lecturer":
	response = answer_lecturer_query(query)
	elif intent == 'doc_link':
	response = answer_doc_link_query(query)
	else:
	response = answer_general_query(query)

	return response

	with gr.Blocks() as iface:
	gr.Markdown(
	"""
	# Anjibot
	Hi friend! I'm Anjibot, CS Group A AI Course Rep. How can I assist you today?
	""")

	chatbot = gr.Chatbot()
	msg = gr.Textbox(placeholder="Type your question here", label="User")
	submit = gr.Button("Submit")
	clear = gr.ClearButton([msg, chatbot])

	with gr.Accordion("Read this, pleaseeee"):
	gr.Markdown(
	"""
	#### As you interact with me, please note:
	- Our chats are not private.
	- I'm still undergoing training (I'm not perfect).
	- I'm not ChatGPT (My knowledge base is limited to class-related issues).
	- I'm British ;)
	""")

	def respond(message, chat_history):
	bot_message = get_response(message)
	chat_history.append(
	(f"You: {message}", f"Anjibot: {bot_message}"))
	time.sleep(2)
	return "", chat_history

	submit.click(respond, [msg, chatbot], [msg, chatbot])
	msg.submit(respond, [msg, chatbot], [msg, chatbot])

	if __name__ == "__main__":
	iface.launch()