Spaces:

dataprincess
/

Anjibot

Running

File size: 10,556 Bytes

import json
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import random
from sentence_transformers import SentenceTransformer
import gradio as gr
import time

# Load datasets
lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str)
doc_link_data = pd.read_csv('docs_link.csv')

with open('anjibot_data.json', 'r', encoding='utf-8') as file:
    anjibot_data = json.load(file)

def load_default_responses(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        default_responses = file.readlines()
    return [response.strip() for response in default_responses]

# Load default responses from file
default_responses = load_default_responses('default_responses.txt')

# Load Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def encode_text(text):
    # Encode text using Sentence Transformer
    embeddings = model.encode([text])
    return embeddings[0]

# function to answer general queries

def answer_general_query(user_question):
    user_question_embedding = encode_text(user_question)

    questions = [item['question'] for item in anjibot_data]
    question_embeddings = np.array([encode_text(q) for q in questions])

    similarities = cosine_similarity([user_question_embedding], question_embeddings)
    most_similar_index = np.argmax(similarities)
    max_similarity = similarities[0][most_similar_index]

    # Set a threshold for similarity
    if max_similarity > 0.5:
        return anjibot_data[most_similar_index]['answer']
    elif max_similarity > 0.3:
        # Select a random default response
        default_response = random.choice(default_responses)
        return default_response
    else:
        return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos."


def normalize_text(text):
    # Convert text to lowercase and remove non-alphanumeric characters
    clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace())
    # Split text into words and remove possessive forms
    words = clean_text.split()
    normalized_words = []
    for word in words:
        # Remove possessive apostrophe if present
        word = word.rstrip("'s")
        normalized_words.append(word)
    return set(normalized_words)

exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in",
                       "of", "and", 'mrs.', 'in', 'and', 'of', 'a',
                       'for', 'the', 'with', 'by', 'at']

# custom similarity matching function
def word_lookup(text, query, exceptions=exceptions):
    # Normalize text and query
    text_words = normalize_text(text)
    query_words = normalize_text(query)

    # Find matching sequences excluding exceptions
    matching_sequences = set()
    for word in text_words:
        if word in query_words and word not in exceptions:
            matching_sequences.add(word)

    # Return the count of matching sequences
    return len(matching_sequences)

def get_phone_number_response(best_match):
    if best_match['phone_number']:
        return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}."
    else:
        return "Sorry, the phone number is not available."

def get_office_response(best_match):
    if best_match['office'] == "No longer in Babcock":
        return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}."
    elif best_match['office']:
        return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}."
    else:
        return "Sorry, the office location is not available."

def get_basic_info_response(query, best_match):
    if "code" in query:
        return f"The course code for {best_match['course']} is {best_match['course_code']}"
    else:
        return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer."

def get_default_response(best_match):
    return f"{best_match['course']} has the course code: {best_match['course_code']}"


def process_query(query, best_match):
    if "phone number" in query or "number" in query:
        return get_phone_number_response(best_match)
    elif "office" in query:
        return get_office_response(best_match)
    elif any(word in query for word in ["lecturer", "who", "code"]):
        return get_basic_info_response(query, best_match)
    else:
        return get_default_response(best_match)

def answer_lecturer_query(query):
    query = query.lower()
    max_score = 0
    best_match = None

    for index, row in lecturer_data.iterrows():
        text = f"{row['course']} {row['course_code']} {row['name']}".lower()
        score = word_lookup(query, text)

        # Find the highest score
        if score > max_score:
            max_score = score
            best_match = row

    if max_score >= 1:
        if any(word in query for word in ["cosc", "geds", "ged"]):
            for i, word in enumerate(query.split()):
                if word.isdigit():
                    # Retrieve the prefix from the previous word
                    query_course_code = f"{query.split()[i - 1]} {word}"
                    if query_course_code.upper() == best_match['course_code']:
                        return process_query(query, best_match)
                    else:
                        return "Sorry, I couldn't find info about the course you've mentioned."
        else:
            return process_query(query, best_match)
    else:
        return answer_general_query(query)

def get_links_response(query, best_match):
    school_files = ["past questions", "pst questions", "pq", "pstq", "slides for"]
    study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"]
    
    if any(keyword in query for keyword in school_files):
        if best_match['School files Link'] != "Unavailable":
            return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you:  {best_match['School files Link']}"
        else:
            return f"Oops! Sorry, I can't find slides or past questions for that course."
    elif any(keyword in query for keyword in study_smarter):
        if best_match['Study Smarter Link'] != "Unavailable":
            return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set:  {best_match['Study Smarter Link']}"
        else:
            return f"I'm sorry, I can't find any study smarter study set for that course."

def answer_doc_link_query(query):
    query = query.lower()
    max_score = 0
    best_match = None

    for index, row in doc_link_data.iterrows():
        text = f"{row['course']} {row['course_code']}".lower()
        score = word_lookup(query, text)

        # Find the highest score
        if score > max_score:
            max_score = score
            best_match = row

    if max_score >= 1:
        if any(word in query for word in ["cosc", "geds", "ged"]):
            for i, word in enumerate(query.split()):
                if word.isdigit():
                    # Retrieve the prefix from the previous word
                    query_course_code = f"{query.split()[i - 1]} {word}"
                    if query_course_code.upper() == best_match['course_code']:
                        return get_links_response(query, best_match)
                    else:
                        return "Sorry, I couldn't find info about the course you've mentioned."
        else:
            return get_links_response(query, best_match)

    else:
        return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the entire query."


# Define function to determine intent
def get_intent(query):
    # Define keywords or phrases associated with each intent
    lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"]
    doc_link_keywords = ["past questions", "pstq", "pq", "pst", "study materials", "flashcards", "studysmarter",
                         "study smarter", "slides", "slide", "pdf"]
    unknown_keywords = ["email", "missed", "write"]

    # Check for keywords in the query
    query_lower = query.lower()
    if any(keyword in query_lower for keyword in unknown_keywords):
        return "unknown"
    elif any(keyword in query_lower for keyword in lecturer_keywords):
        return "lecturer"
    elif any(keyword in query_lower for keyword in doc_link_keywords):
        return "doc_link"
    else:
        return "general"


def get_response(query):

    intent = get_intent(query)

    if query == "":
        response = "Yo! Don't send me stickers, I don't understand them anyway 😕"
    elif intent == "unknown":
        response = "Ugh, your query is quite beyond me. Please meet Anji directly :)"
    elif intent == "lecturer":
        response = answer_lecturer_query(query)
    elif intent == 'doc_link':
        response =  answer_doc_link_query(query)
    else:
        response = answer_general_query(query)

    return response

with gr.Blocks() as iface:
    gr.Markdown(
    """
    # Anjibot
    Hi friend! I'm Anjibot, CS Group A AI Course Rep. How can I assist you today?
    """)
 
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Type your question here", label="User")
    submit = gr.Button("Submit")
    clear = gr.ClearButton([msg, chatbot])
    
    with gr.Accordion("Read this, pleaseeee"):
        gr.Markdown(
        """
        #### As you interact with me, please note:
        - Our chats are not private.
        - I'm still undergoing training (I'm not perfect).
        - I'm not ChatGPT (My knowledge base is limited to class-related issues).
        - I'm British ;)
        """)
    
    def respond(message, chat_history):
        bot_message = get_response(message)
        chat_history.append(
        (f"**You:** {message}", f"**Anjibot:** {bot_message}"))
        time.sleep(2)
        return "", chat_history

    submit.click(respond, [msg, chatbot], [msg, chatbot])
    msg.submit(respond, [msg, chatbot], [msg, chatbot])

if __name__ == "__main__":
    iface.launch()