Spaces:

dataprincess
/

Anjibot

Sleeping

App Files Files Community

dataprincess commited on May 3, 2024

Commit

e5c567d

verified ·

1 Parent(s): 5f46154

Update app.py

Browse files

Files changed (1) hide show

app.py +232 -60

app.py CHANGED Viewed

@@ -1,63 +1,235 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

+-import json
+import pandas as pd
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+import random
+from sentence_transformers import SentenceTransformer
+import datetime
 import gradio as gr
+from gradio import Chatbubble, ChatContext, Chats
+# Load datasets
+lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str)
+doc_link_data = pd.read_csv('docs_link.csv')
+with open('anjibot_data.json', 'r', encoding='utf-8') as file:
+    anjibot_data = json.load(file)
+def load_default_responses(filename):
+    with open(filename, 'r', encoding='utf-8') as file:
+        default_responses = file.readlines()
+    return [response.strip() for response in default_responses]
+# Load default responses from file
+default_responses = load_default_responses('default_responses.txt')
+# Load Sentence Transformer model
+model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+def encode_text(text):
+    # Encode text using Sentence Transformer
+    embeddings = model.encode([text])
+    return embeddings[0]
+# function to answer general queries
+def answer_general_query(user_question):
+    user_question_embedding = encode_text(user_question)
+    questions = [item['question'] for item in anjibot_data]
+    question_embeddings = np.array([encode_text(q) for q in questions])
+    similarities = cosine_similarity([user_question_embedding], question_embeddings)
+    most_similar_index = np.argmax(similarities)
+    max_similarity = similarities[0][most_similar_index]
+    # Set a threshold for similarity
+    if max_similarity > 0.5:
+        return anjibot_data[most_similar_index]['answer']
+    elif max_similarity > 0.3:
+        # Select a random default response
+        default_response = random.choice(default_responses)
+        return default_response
+    else:
+        return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos."
+def normalize_text(text):
+    # Convert text to lowercase and remove non-alphanumeric characters
+    clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace())
+    # Split text into words and remove possessive forms
+    words = clean_text.split()
+    normalized_words = []
+    for word in words:
+        # Remove possessive apostrophe if present
+        word = word.rstrip("'s")
+        normalized_words.append(word)
+    return set(normalized_words)
+exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in",
+                       "of", "and", 'mrs.', 'in', 'and', 'of', 'a',
+                       'for', 'the', 'with', 'by', 'at']
+# custom similarity matching function
+def word_lookup(text, query, exceptions=exceptions):
+    # Normalize text and query
+    text_words = normalize_text(text)
+    query_words = normalize_text(query)
+    # Find matching sequences excluding exceptions
+    matching_sequences = set()
+    for word in text_words:
+        if word in query_words and word not in exceptions:
+            matching_sequences.add(word)
+    # Return the count of matching sequences
+    return len(matching_sequences)
+# Function to find lecturer details using custom matching
+def answer_lecturer_query(query):
+    query = query.lower()
+    max_score = 0
+    best_match = None
+    for index, row in lecturer_data.iterrows():
+        text = f"{row['course']} {row['course_code']} {row['name']}".lower()
+        score = word_lookup(query, text)
+        # Find the highest score
+        if score > max_score:
+            max_score = score
+            best_match = row
+    # Check if the query contains only one word
+    if len(query.split()) == 1:
+        return "I'm sorry, I need more information to assist you."
+    elif max_score >= 1:
+        # Process specific requests for phone number or office
+        if "phone number" in query or "number" in query:
+            if best_match['phone_number']:
+                return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}."
+            else:
+                return f"Sorry, I don't recall the phone number for that lecturer."
+        elif "office" in query:
+            if best_match['office'] == "No longer in Babcock":
+                return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}."
+            elif best_match['office']:
+                return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}."
+            else:
+                return f"Sorry, I seem to have forgotten the office of that lecturer."
+        elif "lecturer" in query or "who" in query:
+            return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer."
+        elif "code" in query:
+            return f"The course code for {best_match['course']} is {best_match['course_code']}"
+        else:
+            return f"{best_match['course']} has the course code: {best_match['course_code']}"
+    else:
+        return answer_general_query(query)
+def answer_doc_link_query(query):
+    query = query.lower()
+    max_score = 0
+    best_match = None
+    school_files = ["past questions", "pq", "pstq", "slides for"]
+    study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"]
+    for index, row in doc_link_data.iterrows():
+        text = f"{row['course']} {row['course_code']}".lower()
+        score = word_lookup(query, text)
+        # Find the highest score
+        if score > max_score:
+            max_score = score
+            best_match = row
+    # Check if the query contains only one word
+    if len(query.split()) == 1:
+        return "I'm sorry, I need more information to assist you."
+    elif max_score >= 1:
+        if any(keyword in query for keyword in school_files):
+            if best_match['School files Link'] != "Unavailable":
+                return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you:  {best_match['School files Link']}"
+            else:
+                return f"Oops! Sorry, I can't find slides or past questions for that course."
+        elif any(keyword in query for keyword in study_smarter):
+            if best_match['Study Smarter Link'] != "Unavailable":
+                return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set:  {best_match['Study Smarter Link']}"
+            else:
+                return f"I'm sorry, I can't find any study smarter study set for that course."
+    elif max_score < 1:
+        return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the complete query."
+    else:
+        answer_general_query(query)
+# Define function to determine intent
+def get_intent(query):
+    # Define keywords or phrases associated with each intent
+    lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"]
+    doc_link_keywords = ["past questions", "pstq", "study materials", "flashcards", "studysmarter",
+                         "study smarter", "slides", "slide", "pdf"]
+    unknown_keywords = ["email", "missed", "write"]
+    # Check for keywords in the query
+    query_lower = query.lower()
+    if any(keyword in query_lower for keyword in unknown_keywords):
+        return "unknown"
+    elif any(keyword in query_lower for keyword in lecturer_keywords):
+        return "lecturer"
+    elif any(keyword in query_lower for keyword in doc_link_keywords):
+        return "doc_link"
+    else:
+        return "general"
+# Define variables to track the previous query and response
+previous_query = ""
+previous_response = ""
+def get_response(query):
+    global previous_query, previous_response
+    if previous_response.lower() == "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the complete query.":
+        # Append the previous query to the current one
+        query = previous_query + " " + query
+    intent = get_intent(query)
+    if query == "":
+        response = "Yo! Don't send me stickers, I don't understand them anyway 😕"
+    elif intent == "unknown":
+        response = "Ugh, your query is quite beyond me. Please meet Anji directly :)"
+    elif intent == "lecturer":
+        response = answer_lecturer_query(query)
+    elif intent == 'doc_link':
+        response =  answer_doc_link_query(query)
+    else:
+        response = answer_general_query(query)
+    # Update previous query and response
+    previous_query = query
+    previous_response = response
+    return response
+iface = gr.Interface(
+    fn=get_response,
+    inputs=[gr.Textbox(label="User:", placeholder="Enter your query")],
+    outputs=[gr.Textbox(label="Anjibot:", lines=3, context=ChatContext(history=5))],
+    title="AnjBot",
+    description="Hello! I'm AnjiBot, CS Group A AI Course Rep. How may I assist you today?",
+    examples=[
+        ["I need Dr. Seun's phone number"],
+        ["When is the next class?"],
+        ["I need the slides from today's lectures."],
+    ],
+    additional_inputs=[
+        gr.Textbox(value="Please note that the data you share with Anjibot is not private.")]
+)
+iface.launch(share=True)