Anjibot / app.py
dataprincess's picture
Update app.py
931de63 verified
import json
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import random
from sentence_transformers import SentenceTransformer
import gradio as gr
import time
# Load datasets
lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str)
doc_link_data = pd.read_csv('docs_link.csv')
with open('anjibot_data.json', 'r', encoding='utf-8') as file:
anjibot_data = json.load(file)
def load_default_responses(filename):
with open(filename, 'r', encoding='utf-8') as file:
default_responses = file.readlines()
return [response.strip() for response in default_responses]
# Load default responses from file
default_responses = load_default_responses('default_responses.txt')
# Load Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
def encode_text(text):
# Encode text using Sentence Transformer
embeddings = model.encode([text])
return embeddings[0]
# function to answer general queries
def answer_general_query(user_question):
user_question_embedding = encode_text(user_question)
questions = [item['question'] for item in anjibot_data]
question_embeddings = np.array([encode_text(q) for q in questions])
similarities = cosine_similarity([user_question_embedding], question_embeddings)
most_similar_index = np.argmax(similarities)
max_similarity = similarities[0][most_similar_index]
# Set a threshold for similarity
if max_similarity > 0.5:
return anjibot_data[most_similar_index]['answer']
elif max_similarity > 0.3:
# Select a random default response
default_response = random.choice(default_responses)
return default_response
else:
return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos."
def normalize_text(text):
# Convert text to lowercase and remove non-alphanumeric characters
clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace())
# Split text into words and remove possessive forms
words = clean_text.split()
normalized_words = []
for word in words:
# Remove possessive apostrophe if present
word = word.rstrip("'s")
normalized_words.append(word)
return set(normalized_words)
exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in",
"of", "and", 'mrs.', 'in', 'and', 'of', 'a',
'for', 'the', 'with', 'by', 'at']
# custom similarity matching function
def word_lookup(text, query, exceptions=exceptions):
# Normalize text and query
text_words = normalize_text(text)
query_words = normalize_text(query)
# Find matching sequences excluding exceptions
matching_sequences = set()
for word in text_words:
if word in query_words and word not in exceptions:
matching_sequences.add(word)
# Return the count of matching sequences
return len(matching_sequences)
def get_phone_number_response(best_match):
if best_match['phone_number']:
return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}."
else:
return "Sorry, the phone number is not available."
def get_office_response(best_match):
if best_match['office'] == "No longer in Babcock":
return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}."
elif best_match['office']:
return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}."
else:
return "Sorry, the office location is not available."
def get_basic_info_response(query, best_match):
if "code" in query:
return f"The course code for {best_match['course']} is {best_match['course_code']}"
else:
return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer."
def get_default_response(best_match):
return f"{best_match['course']} has the course code: {best_match['course_code']}"
def process_query(query, best_match):
if "phone number" in query or "number" in query:
return get_phone_number_response(best_match)
elif "office" in query:
return get_office_response(best_match)
elif any(word in query for word in ["lecturer", "who", "code"]):
return get_basic_info_response(query, best_match)
else:
return get_default_response(best_match)
def answer_lecturer_query(query):
query = query.lower()
max_score = 0
best_match = None
for index, row in lecturer_data.iterrows():
text = f"{row['course']} {row['course_code']} {row['name']}".lower()
score = word_lookup(query, text)
# Find the highest score
if score > max_score:
max_score = score
best_match = row
if max_score >= 1:
if any(word in query for word in ["cosc", "geds", "ged"]):
for i, word in enumerate(query.split()):
if word.isdigit():
# Retrieve the prefix from the previous word
query_course_code = f"{query.split()[i - 1]} {word}"
if query_course_code.upper() == best_match['course_code']:
return process_query(query, best_match)
else:
return "Sorry, I couldn't find info about the course you've mentioned."
else:
return process_query(query, best_match)
else:
return answer_general_query(query)
def get_links_response(query, best_match):
school_files = ["past questions", "pst questions", "pq", "pstq", "slides for"]
study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"]
if any(keyword in query for keyword in school_files):
if best_match['School files Link'] != "Unavailable":
return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you: {best_match['School files Link']}"
else:
return f"Oops! Sorry, I can't find slides or past questions for that course."
elif any(keyword in query for keyword in study_smarter):
if best_match['Study Smarter Link'] != "Unavailable":
return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set: {best_match['Study Smarter Link']}"
else:
return f"I'm sorry, I can't find any study smarter study set for that course."
def answer_doc_link_query(query):
query = query.lower()
max_score = 0
best_match = None
for index, row in doc_link_data.iterrows():
text = f"{row['course']} {row['course_code']}".lower()
score = word_lookup(query, text)
# Find the highest score
if score > max_score:
max_score = score
best_match = row
if max_score >= 1:
if any(word in query for word in ["cosc", "geds", "ged"]):
for i, word in enumerate(query.split()):
if word.isdigit():
# Retrieve the prefix from the previous word
query_course_code = f"{query.split()[i - 1]} {word}"
if query_course_code.upper() == best_match['course_code']:
return get_links_response(query, best_match)
else:
return "Sorry, I couldn't find info about the course you've mentioned."
else:
return get_links_response(query, best_match)
else:
return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the entire query."
# Define function to determine intent
def get_intent(query):
# Define keywords or phrases associated with each intent
lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"]
doc_link_keywords = ["past questions", "pstq", "pq", "pst", "study materials", "flashcards", "studysmarter",
"study smarter", "slides", "slide", "pdf"]
unknown_keywords = ["email", "missed", "write"]
# Check for keywords in the query
query_lower = query.lower()
if any(keyword in query_lower for keyword in unknown_keywords):
return "unknown"
elif any(keyword in query_lower for keyword in lecturer_keywords):
return "lecturer"
elif any(keyword in query_lower for keyword in doc_link_keywords):
return "doc_link"
else:
return "general"
def get_response(query):
intent = get_intent(query)
if query == "":
response = "Yo! Don't send me stickers, I don't understand them anyway 😕"
elif intent == "unknown":
response = "Ugh, your query is quite beyond me. Please meet Anji directly :)"
elif intent == "lecturer":
response = answer_lecturer_query(query)
elif intent == 'doc_link':
response = answer_doc_link_query(query)
else:
response = answer_general_query(query)
return response
with gr.Blocks() as iface:
gr.Markdown(
"""
# Anjibot
Hi friend! I'm Anjibot, CS Group A AI Course Rep. How can I assist you today?
""")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Type your question here", label="User")
submit = gr.Button("Submit")
clear = gr.ClearButton([msg, chatbot])
with gr.Accordion("Read this, pleaseeee"):
gr.Markdown(
"""
#### As you interact with me, please note:
- Our chats are not private.
- I'm still undergoing training (I'm not perfect).
- I'm not ChatGPT (My knowledge base is limited to class-related issues).
- I'm British ;)
""")
def respond(message, chat_history):
bot_message = get_response(message)
chat_history.append(
(f"**You:** {message}", f"**Anjibot:** {bot_message}"))
time.sleep(2)
return "", chat_history
submit.click(respond, [msg, chatbot], [msg, chatbot])
msg.submit(respond, [msg, chatbot], [msg, chatbot])
if __name__ == "__main__":
iface.launch()