File size: 10,556 Bytes
f87f1f2
e5c567d
 
 
 
 
11ccd38
63eff77
e5c567d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9b741d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5c567d
b9b741d
e5c567d
 
 
 
 
 
 
11ccd38
e5c567d
 
 
 
11ccd38
b9b741d
 
f360851
 
 
 
 
 
 
 
e5c567d
b9b741d
e5c567d
 
 
3b68ffb
c30b85f
 
 
 
e8c7059
 
 
 
 
 
 
 
 
c30b85f
e5c567d
 
 
 
 
 
 
 
 
 
 
 
 
 
c30b85f
 
 
 
 
 
 
931de63
c30b85f
 
 
3b68ffb
e5c567d
 
c30b85f
e5c567d
 
 
 
 
 
b9b741d
e5c567d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63eff77
16f76a2
 
 
0a85d7d
16f76a2
 
63eff77
16f76a2
 
71b60bb
16f76a2
0a85d7d
 
 
 
 
 
 
 
 
 
63eff77
 
16f76a2
7147ba2
63eff77
 
 
16f76a2
 
67a8e04
63eff77
3f3c355
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
import json
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import random
from sentence_transformers import SentenceTransformer
import gradio as gr
import time

# Load datasets
lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str)
doc_link_data = pd.read_csv('docs_link.csv')

with open('anjibot_data.json', 'r', encoding='utf-8') as file:
    anjibot_data = json.load(file)

def load_default_responses(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        default_responses = file.readlines()
    return [response.strip() for response in default_responses]

# Load default responses from file
default_responses = load_default_responses('default_responses.txt')

# Load Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def encode_text(text):
    # Encode text using Sentence Transformer
    embeddings = model.encode([text])
    return embeddings[0]

# function to answer general queries

def answer_general_query(user_question):
    user_question_embedding = encode_text(user_question)

    questions = [item['question'] for item in anjibot_data]
    question_embeddings = np.array([encode_text(q) for q in questions])

    similarities = cosine_similarity([user_question_embedding], question_embeddings)
    most_similar_index = np.argmax(similarities)
    max_similarity = similarities[0][most_similar_index]

    # Set a threshold for similarity
    if max_similarity > 0.5:
        return anjibot_data[most_similar_index]['answer']
    elif max_similarity > 0.3:
        # Select a random default response
        default_response = random.choice(default_responses)
        return default_response
    else:
        return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos."


def normalize_text(text):
    # Convert text to lowercase and remove non-alphanumeric characters
    clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace())
    # Split text into words and remove possessive forms
    words = clean_text.split()
    normalized_words = []
    for word in words:
        # Remove possessive apostrophe if present
        word = word.rstrip("'s")
        normalized_words.append(word)
    return set(normalized_words)

exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in",
                       "of", "and", 'mrs.', 'in', 'and', 'of', 'a',
                       'for', 'the', 'with', 'by', 'at']

# custom similarity matching function
def word_lookup(text, query, exceptions=exceptions):
    # Normalize text and query
    text_words = normalize_text(text)
    query_words = normalize_text(query)

    # Find matching sequences excluding exceptions
    matching_sequences = set()
    for word in text_words:
        if word in query_words and word not in exceptions:
            matching_sequences.add(word)

    # Return the count of matching sequences
    return len(matching_sequences)

def get_phone_number_response(best_match):
    if best_match['phone_number']:
        return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}."
    else:
        return "Sorry, the phone number is not available."

def get_office_response(best_match):
    if best_match['office'] == "No longer in Babcock":
        return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}."
    elif best_match['office']:
        return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}."
    else:
        return "Sorry, the office location is not available."

def get_basic_info_response(query, best_match):
    if "code" in query:
        return f"The course code for {best_match['course']} is {best_match['course_code']}"
    else:
        return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer."

def get_default_response(best_match):
    return f"{best_match['course']} has the course code: {best_match['course_code']}"


def process_query(query, best_match):
    if "phone number" in query or "number" in query:
        return get_phone_number_response(best_match)
    elif "office" in query:
        return get_office_response(best_match)
    elif any(word in query for word in ["lecturer", "who", "code"]):
        return get_basic_info_response(query, best_match)
    else:
        return get_default_response(best_match)

def answer_lecturer_query(query):
    query = query.lower()
    max_score = 0
    best_match = None

    for index, row in lecturer_data.iterrows():
        text = f"{row['course']} {row['course_code']} {row['name']}".lower()
        score = word_lookup(query, text)

        # Find the highest score
        if score > max_score:
            max_score = score
            best_match = row

    if max_score >= 1:
        if any(word in query for word in ["cosc", "geds", "ged"]):
            for i, word in enumerate(query.split()):
                if word.isdigit():
                    # Retrieve the prefix from the previous word
                    query_course_code = f"{query.split()[i - 1]} {word}"
                    if query_course_code.upper() == best_match['course_code']:
                        return process_query(query, best_match)
                    else:
                        return "Sorry, I couldn't find info about the course you've mentioned."
        else:
            return process_query(query, best_match)
    else:
        return answer_general_query(query)

def get_links_response(query, best_match):
    school_files = ["past questions", "pst questions", "pq", "pstq", "slides for"]
    study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"]
    
    if any(keyword in query for keyword in school_files):
        if best_match['School files Link'] != "Unavailable":
            return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you:  {best_match['School files Link']}"
        else:
            return f"Oops! Sorry, I can't find slides or past questions for that course."
    elif any(keyword in query for keyword in study_smarter):
        if best_match['Study Smarter Link'] != "Unavailable":
            return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set:  {best_match['Study Smarter Link']}"
        else:
            return f"I'm sorry, I can't find any study smarter study set for that course."

def answer_doc_link_query(query):
    query = query.lower()
    max_score = 0
    best_match = None

    for index, row in doc_link_data.iterrows():
        text = f"{row['course']} {row['course_code']}".lower()
        score = word_lookup(query, text)

        # Find the highest score
        if score > max_score:
            max_score = score
            best_match = row

    if max_score >= 1:
        if any(word in query for word in ["cosc", "geds", "ged"]):
            for i, word in enumerate(query.split()):
                if word.isdigit():
                    # Retrieve the prefix from the previous word
                    query_course_code = f"{query.split()[i - 1]} {word}"
                    if query_course_code.upper() == best_match['course_code']:
                        return get_links_response(query, best_match)
                    else:
                        return "Sorry, I couldn't find info about the course you've mentioned."
        else:
            return get_links_response(query, best_match)

    else:
        return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the entire query."


# Define function to determine intent
def get_intent(query):
    # Define keywords or phrases associated with each intent
    lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"]
    doc_link_keywords = ["past questions", "pstq", "pq", "pst", "study materials", "flashcards", "studysmarter",
                         "study smarter", "slides", "slide", "pdf"]
    unknown_keywords = ["email", "missed", "write"]

    # Check for keywords in the query
    query_lower = query.lower()
    if any(keyword in query_lower for keyword in unknown_keywords):
        return "unknown"
    elif any(keyword in query_lower for keyword in lecturer_keywords):
        return "lecturer"
    elif any(keyword in query_lower for keyword in doc_link_keywords):
        return "doc_link"
    else:
        return "general"


def get_response(query):

    intent = get_intent(query)

    if query == "":
        response = "Yo! Don't send me stickers, I don't understand them anyway 😕"
    elif intent == "unknown":
        response = "Ugh, your query is quite beyond me. Please meet Anji directly :)"
    elif intent == "lecturer":
        response = answer_lecturer_query(query)
    elif intent == 'doc_link':
        response =  answer_doc_link_query(query)
    else:
        response = answer_general_query(query)

    return response

with gr.Blocks() as iface:
    gr.Markdown(
    """
    # Anjibot
    Hi friend! I'm Anjibot, CS Group A AI Course Rep. How can I assist you today?
    """)
 
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Type your question here", label="User")
    submit = gr.Button("Submit")
    clear = gr.ClearButton([msg, chatbot])
    
    with gr.Accordion("Read this, pleaseeee"):
        gr.Markdown(
        """
        #### As you interact with me, please note:
        - Our chats are not private.
        - I'm still undergoing training (I'm not perfect).
        - I'm not ChatGPT (My knowledge base is limited to class-related issues).
        - I'm British ;)
        """)
    
    def respond(message, chat_history):
        bot_message = get_response(message)
        chat_history.append(
        (f"**You:** {message}", f"**Anjibot:** {bot_message}"))
        time.sleep(2)
        return "", chat_history

    submit.click(respond, [msg, chatbot], [msg, chatbot])
    msg.submit(respond, [msg, chatbot], [msg, chatbot])

if __name__ == "__main__":
    iface.launch()