Spaces:
Running
Running
File size: 10,556 Bytes
f87f1f2 e5c567d 11ccd38 63eff77 e5c567d b9b741d e5c567d b9b741d e5c567d 11ccd38 e5c567d 11ccd38 b9b741d f360851 e5c567d b9b741d e5c567d 3b68ffb c30b85f e8c7059 c30b85f e5c567d c30b85f 931de63 c30b85f 3b68ffb e5c567d c30b85f e5c567d b9b741d e5c567d 63eff77 16f76a2 0a85d7d 16f76a2 63eff77 16f76a2 71b60bb 16f76a2 0a85d7d 63eff77 16f76a2 7147ba2 63eff77 16f76a2 67a8e04 63eff77 3f3c355 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
import json
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import random
from sentence_transformers import SentenceTransformer
import gradio as gr
import time
# Load datasets
lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str)
doc_link_data = pd.read_csv('docs_link.csv')
with open('anjibot_data.json', 'r', encoding='utf-8') as file:
anjibot_data = json.load(file)
def load_default_responses(filename):
with open(filename, 'r', encoding='utf-8') as file:
default_responses = file.readlines()
return [response.strip() for response in default_responses]
# Load default responses from file
default_responses = load_default_responses('default_responses.txt')
# Load Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
def encode_text(text):
# Encode text using Sentence Transformer
embeddings = model.encode([text])
return embeddings[0]
# function to answer general queries
def answer_general_query(user_question):
user_question_embedding = encode_text(user_question)
questions = [item['question'] for item in anjibot_data]
question_embeddings = np.array([encode_text(q) for q in questions])
similarities = cosine_similarity([user_question_embedding], question_embeddings)
most_similar_index = np.argmax(similarities)
max_similarity = similarities[0][most_similar_index]
# Set a threshold for similarity
if max_similarity > 0.5:
return anjibot_data[most_similar_index]['answer']
elif max_similarity > 0.3:
# Select a random default response
default_response = random.choice(default_responses)
return default_response
else:
return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos."
def normalize_text(text):
# Convert text to lowercase and remove non-alphanumeric characters
clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace())
# Split text into words and remove possessive forms
words = clean_text.split()
normalized_words = []
for word in words:
# Remove possessive apostrophe if present
word = word.rstrip("'s")
normalized_words.append(word)
return set(normalized_words)
exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in",
"of", "and", 'mrs.', 'in', 'and', 'of', 'a',
'for', 'the', 'with', 'by', 'at']
# custom similarity matching function
def word_lookup(text, query, exceptions=exceptions):
# Normalize text and query
text_words = normalize_text(text)
query_words = normalize_text(query)
# Find matching sequences excluding exceptions
matching_sequences = set()
for word in text_words:
if word in query_words and word not in exceptions:
matching_sequences.add(word)
# Return the count of matching sequences
return len(matching_sequences)
def get_phone_number_response(best_match):
if best_match['phone_number']:
return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}."
else:
return "Sorry, the phone number is not available."
def get_office_response(best_match):
if best_match['office'] == "No longer in Babcock":
return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}."
elif best_match['office']:
return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}."
else:
return "Sorry, the office location is not available."
def get_basic_info_response(query, best_match):
if "code" in query:
return f"The course code for {best_match['course']} is {best_match['course_code']}"
else:
return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer."
def get_default_response(best_match):
return f"{best_match['course']} has the course code: {best_match['course_code']}"
def process_query(query, best_match):
if "phone number" in query or "number" in query:
return get_phone_number_response(best_match)
elif "office" in query:
return get_office_response(best_match)
elif any(word in query for word in ["lecturer", "who", "code"]):
return get_basic_info_response(query, best_match)
else:
return get_default_response(best_match)
def answer_lecturer_query(query):
query = query.lower()
max_score = 0
best_match = None
for index, row in lecturer_data.iterrows():
text = f"{row['course']} {row['course_code']} {row['name']}".lower()
score = word_lookup(query, text)
# Find the highest score
if score > max_score:
max_score = score
best_match = row
if max_score >= 1:
if any(word in query for word in ["cosc", "geds", "ged"]):
for i, word in enumerate(query.split()):
if word.isdigit():
# Retrieve the prefix from the previous word
query_course_code = f"{query.split()[i - 1]} {word}"
if query_course_code.upper() == best_match['course_code']:
return process_query(query, best_match)
else:
return "Sorry, I couldn't find info about the course you've mentioned."
else:
return process_query(query, best_match)
else:
return answer_general_query(query)
def get_links_response(query, best_match):
school_files = ["past questions", "pst questions", "pq", "pstq", "slides for"]
study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"]
if any(keyword in query for keyword in school_files):
if best_match['School files Link'] != "Unavailable":
return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you: {best_match['School files Link']}"
else:
return f"Oops! Sorry, I can't find slides or past questions for that course."
elif any(keyword in query for keyword in study_smarter):
if best_match['Study Smarter Link'] != "Unavailable":
return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set: {best_match['Study Smarter Link']}"
else:
return f"I'm sorry, I can't find any study smarter study set for that course."
def answer_doc_link_query(query):
query = query.lower()
max_score = 0
best_match = None
for index, row in doc_link_data.iterrows():
text = f"{row['course']} {row['course_code']}".lower()
score = word_lookup(query, text)
# Find the highest score
if score > max_score:
max_score = score
best_match = row
if max_score >= 1:
if any(word in query for word in ["cosc", "geds", "ged"]):
for i, word in enumerate(query.split()):
if word.isdigit():
# Retrieve the prefix from the previous word
query_course_code = f"{query.split()[i - 1]} {word}"
if query_course_code.upper() == best_match['course_code']:
return get_links_response(query, best_match)
else:
return "Sorry, I couldn't find info about the course you've mentioned."
else:
return get_links_response(query, best_match)
else:
return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the entire query."
# Define function to determine intent
def get_intent(query):
# Define keywords or phrases associated with each intent
lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"]
doc_link_keywords = ["past questions", "pstq", "pq", "pst", "study materials", "flashcards", "studysmarter",
"study smarter", "slides", "slide", "pdf"]
unknown_keywords = ["email", "missed", "write"]
# Check for keywords in the query
query_lower = query.lower()
if any(keyword in query_lower for keyword in unknown_keywords):
return "unknown"
elif any(keyword in query_lower for keyword in lecturer_keywords):
return "lecturer"
elif any(keyword in query_lower for keyword in doc_link_keywords):
return "doc_link"
else:
return "general"
def get_response(query):
intent = get_intent(query)
if query == "":
response = "Yo! Don't send me stickers, I don't understand them anyway 😕"
elif intent == "unknown":
response = "Ugh, your query is quite beyond me. Please meet Anji directly :)"
elif intent == "lecturer":
response = answer_lecturer_query(query)
elif intent == 'doc_link':
response = answer_doc_link_query(query)
else:
response = answer_general_query(query)
return response
with gr.Blocks() as iface:
gr.Markdown(
"""
# Anjibot
Hi friend! I'm Anjibot, CS Group A AI Course Rep. How can I assist you today?
""")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Type your question here", label="User")
submit = gr.Button("Submit")
clear = gr.ClearButton([msg, chatbot])
with gr.Accordion("Read this, pleaseeee"):
gr.Markdown(
"""
#### As you interact with me, please note:
- Our chats are not private.
- I'm still undergoing training (I'm not perfect).
- I'm not ChatGPT (My knowledge base is limited to class-related issues).
- I'm British ;)
""")
def respond(message, chat_history):
bot_message = get_response(message)
chat_history.append(
(f"**You:** {message}", f"**Anjibot:** {bot_message}"))
time.sleep(2)
return "", chat_history
submit.click(respond, [msg, chatbot], [msg, chatbot])
msg.submit(respond, [msg, chatbot], [msg, chatbot])
if __name__ == "__main__":
iface.launch() |