Spaces:
Sleeping
Sleeping
| import json | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import random | |
| from sentence_transformers import SentenceTransformer | |
| import gradio as gr | |
| import time | |
| # Load datasets | |
| lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str) | |
| doc_link_data = pd.read_csv('docs_link.csv') | |
| with open('anjibot_data.json', 'r', encoding='utf-8') as file: | |
| anjibot_data = json.load(file) | |
| def load_default_responses(filename): | |
| with open(filename, 'r', encoding='utf-8') as file: | |
| default_responses = file.readlines() | |
| return [response.strip() for response in default_responses] | |
| # Load default responses from file | |
| default_responses = load_default_responses('default_responses.txt') | |
| # Load Sentence Transformer model | |
| model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
| def encode_text(text): | |
| # Encode text using Sentence Transformer | |
| embeddings = model.encode([text]) | |
| return embeddings[0] | |
| # function to answer general queries | |
| def answer_general_query(user_question): | |
| user_question_embedding = encode_text(user_question) | |
| questions = [item['question'] for item in anjibot_data] | |
| question_embeddings = np.array([encode_text(q) for q in questions]) | |
| similarities = cosine_similarity([user_question_embedding], question_embeddings) | |
| most_similar_index = np.argmax(similarities) | |
| max_similarity = similarities[0][most_similar_index] | |
| # Set a threshold for similarity | |
| if max_similarity > 0.5: | |
| return anjibot_data[most_similar_index]['answer'] | |
| elif max_similarity > 0.3: | |
| # Select a random default response | |
| default_response = random.choice(default_responses) | |
| return default_response | |
| else: | |
| return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos." | |
| def normalize_text(text): | |
| # Convert text to lowercase and remove non-alphanumeric characters | |
| clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace()) | |
| # Split text into words and remove possessive forms | |
| words = clean_text.split() | |
| normalized_words = [] | |
| for word in words: | |
| # Remove possessive apostrophe if present | |
| word = word.rstrip("'s") | |
| normalized_words.append(word) | |
| return set(normalized_words) | |
| exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in", | |
| "of", "and", 'mrs.', 'in', 'and', 'of', 'a', | |
| 'for', 'the', 'with', 'by', 'at'] | |
| # custom similarity matching function | |
| def word_lookup(text, query, exceptions=exceptions): | |
| # Normalize text and query | |
| text_words = normalize_text(text) | |
| query_words = normalize_text(query) | |
| # Find matching sequences excluding exceptions | |
| matching_sequences = set() | |
| for word in text_words: | |
| if word in query_words and word not in exceptions: | |
| matching_sequences.add(word) | |
| # Return the count of matching sequences | |
| return len(matching_sequences) | |
| def get_phone_number_response(best_match): | |
| if best_match['phone_number']: | |
| return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}." | |
| else: | |
| return "Sorry, the phone number is not available." | |
| def get_office_response(best_match): | |
| if best_match['office'] == "No longer in Babcock": | |
| return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}." | |
| elif best_match['office']: | |
| return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}." | |
| else: | |
| return "Sorry, the office location is not available." | |
| def get_basic_info_response(query, best_match): | |
| if "code" in query: | |
| return f"The course code for {best_match['course']} is {best_match['course_code']}" | |
| else: | |
| return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer." | |
| def get_default_response(best_match): | |
| return f"{best_match['course']} has the course code: {best_match['course_code']}" | |
| def process_query(query, best_match): | |
| if "phone number" in query or "number" in query: | |
| return get_phone_number_response(best_match) | |
| elif "office" in query: | |
| return get_office_response(best_match) | |
| elif any(word in query for word in ["lecturer", "who", "code"]): | |
| return get_basic_info_response(query, best_match) | |
| else: | |
| return get_default_response(best_match) | |
| def answer_lecturer_query(query): | |
| query = query.lower() | |
| max_score = 0 | |
| best_match = None | |
| for index, row in lecturer_data.iterrows(): | |
| text = f"{row['course']} {row['course_code']} {row['name']}".lower() | |
| score = word_lookup(query, text) | |
| # Find the highest score | |
| if score > max_score: | |
| max_score = score | |
| best_match = row | |
| if max_score >= 1: | |
| if any(word in query for word in ["cosc", "geds", "ged"]): | |
| for i, word in enumerate(query.split()): | |
| if word.isdigit(): | |
| # Retrieve the prefix from the previous word | |
| query_course_code = f"{query.split()[i - 1]} {word}" | |
| if query_course_code.upper() == best_match['course_code']: | |
| return process_query(query, best_match) | |
| else: | |
| return "Sorry, I couldn't find info about the course you've mentioned." | |
| else: | |
| return process_query(query, best_match) | |
| else: | |
| return answer_general_query(query) | |
| def get_links_response(query, best_match): | |
| school_files = ["past questions", "pst questions", "pq", "pstq", "slides for"] | |
| study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"] | |
| if any(keyword in query for keyword in school_files): | |
| if best_match['School files Link'] != "Unavailable": | |
| return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you: {best_match['School files Link']}" | |
| else: | |
| return f"Oops! Sorry, I can't find slides or past questions for that course." | |
| elif any(keyword in query for keyword in study_smarter): | |
| if best_match['Study Smarter Link'] != "Unavailable": | |
| return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set: {best_match['Study Smarter Link']}" | |
| else: | |
| return f"I'm sorry, I can't find any study smarter study set for that course." | |
| def answer_doc_link_query(query): | |
| query = query.lower() | |
| max_score = 0 | |
| best_match = None | |
| for index, row in doc_link_data.iterrows(): | |
| text = f"{row['course']} {row['course_code']}".lower() | |
| score = word_lookup(query, text) | |
| # Find the highest score | |
| if score > max_score: | |
| max_score = score | |
| best_match = row | |
| if max_score >= 1: | |
| if any(word in query for word in ["cosc", "geds", "ged"]): | |
| for i, word in enumerate(query.split()): | |
| if word.isdigit(): | |
| # Retrieve the prefix from the previous word | |
| query_course_code = f"{query.split()[i - 1]} {word}" | |
| if query_course_code.upper() == best_match['course_code']: | |
| return get_links_response(query, best_match) | |
| else: | |
| return "Sorry, I couldn't find info about the course you've mentioned." | |
| else: | |
| return get_links_response(query, best_match) | |
| else: | |
| return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the entire query." | |
| # Define function to determine intent | |
| def get_intent(query): | |
| # Define keywords or phrases associated with each intent | |
| lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"] | |
| doc_link_keywords = ["past questions", "pstq", "pq", "pst", "study materials", "flashcards", "studysmarter", | |
| "study smarter", "slides", "slide", "pdf"] | |
| unknown_keywords = ["email", "missed", "write"] | |
| # Check for keywords in the query | |
| query_lower = query.lower() | |
| if any(keyword in query_lower for keyword in unknown_keywords): | |
| return "unknown" | |
| elif any(keyword in query_lower for keyword in lecturer_keywords): | |
| return "lecturer" | |
| elif any(keyword in query_lower for keyword in doc_link_keywords): | |
| return "doc_link" | |
| else: | |
| return "general" | |
| def get_response(query): | |
| intent = get_intent(query) | |
| if query == "": | |
| response = "Yo! Don't send me stickers, I don't understand them anyway 😕" | |
| elif intent == "unknown": | |
| response = "Ugh, your query is quite beyond me. Please meet Anji directly :)" | |
| elif intent == "lecturer": | |
| response = answer_lecturer_query(query) | |
| elif intent == 'doc_link': | |
| response = answer_doc_link_query(query) | |
| else: | |
| response = answer_general_query(query) | |
| return response | |
| with gr.Blocks() as iface: | |
| gr.Markdown( | |
| """ | |
| # Anjibot | |
| Hi friend! I'm Anjibot, CS Group A AI Course Rep. How can I assist you today? | |
| """) | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(placeholder="Type your question here", label="User") | |
| submit = gr.Button("Submit") | |
| clear = gr.ClearButton([msg, chatbot]) | |
| with gr.Accordion("Read this, pleaseeee"): | |
| gr.Markdown( | |
| """ | |
| #### As you interact with me, please note: | |
| - Our chats are not private. | |
| - I'm still undergoing training (I'm not perfect). | |
| - I'm not ChatGPT (My knowledge base is limited to class-related issues). | |
| - I'm British ;) | |
| """) | |
| def respond(message, chat_history): | |
| bot_message = get_response(message) | |
| chat_history.append( | |
| (f"**You:** {message}", f"**Anjibot:** {bot_message}")) | |
| time.sleep(2) | |
| return "", chat_history | |
| submit.click(respond, [msg, chatbot], [msg, chatbot]) | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| if __name__ == "__main__": | |
| iface.launch() |