dataprincess commited on
Commit
e5c567d
·
verified ·
1 Parent(s): 5f46154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +232 -60
app.py CHANGED
@@ -1,63 +1,235 @@
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
 
61
 
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -import json
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import random
6
+ from sentence_transformers import SentenceTransformer
7
+ import datetime
8
  import gradio as gr
9
+ from gradio import Chatbubble, ChatContext, Chats
10
+
11
+ # Load datasets
12
+ lecturer_data = pd.read_csv('lecturers.csv', dtype={"phone_number": str}).astype(str)
13
+ doc_link_data = pd.read_csv('docs_link.csv')
14
+
15
+ with open('anjibot_data.json', 'r', encoding='utf-8') as file:
16
+ anjibot_data = json.load(file)
17
+
18
+ def load_default_responses(filename):
19
+ with open(filename, 'r', encoding='utf-8') as file:
20
+ default_responses = file.readlines()
21
+ return [response.strip() for response in default_responses]
22
+
23
+ # Load default responses from file
24
+ default_responses = load_default_responses('default_responses.txt')
25
+
26
+ # Load Sentence Transformer model
27
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
28
+
29
+ def encode_text(text):
30
+ # Encode text using Sentence Transformer
31
+ embeddings = model.encode([text])
32
+ return embeddings[0]
33
+
34
+ # function to answer general queries
35
+
36
+ def answer_general_query(user_question):
37
+ user_question_embedding = encode_text(user_question)
38
+
39
+ questions = [item['question'] for item in anjibot_data]
40
+ question_embeddings = np.array([encode_text(q) for q in questions])
41
+
42
+ similarities = cosine_similarity([user_question_embedding], question_embeddings)
43
+ most_similar_index = np.argmax(similarities)
44
+ max_similarity = similarities[0][most_similar_index]
45
+
46
+ # Set a threshold for similarity
47
+ if max_similarity > 0.5:
48
+ return anjibot_data[most_similar_index]['answer']
49
+ elif max_similarity > 0.3:
50
+ # Select a random default response
51
+ default_response = random.choice(default_responses)
52
+ return default_response
53
+ else:
54
+ return "I'm sorry, I couldn't find the answer to your question. Please meet Anji or any of the class excos."
55
+
56
+
57
+ def normalize_text(text):
58
+ # Convert text to lowercase and remove non-alphanumeric characters
59
+ clean_text = ''.join(char.lower() for char in text if char.isalnum() or char.isspace())
60
+ # Split text into words and remove possessive forms
61
+ words = clean_text.split()
62
+ normalized_words = []
63
+ for word in words:
64
+ # Remove possessive apostrophe if present
65
+ word = word.rstrip("'s")
66
+ normalized_words.append(word)
67
+ return set(normalized_words)
68
+
69
+ exceptions = ["mr", "dr", "the", "i", "to", "ayo", "in",
70
+ "of", "and", 'mrs.', 'in', 'and', 'of', 'a',
71
+ 'for', 'the', 'with', 'by', 'at']
72
+
73
+ # custom similarity matching function
74
+ def word_lookup(text, query, exceptions=exceptions):
75
+ # Normalize text and query
76
+ text_words = normalize_text(text)
77
+ query_words = normalize_text(query)
78
+
79
+ # Find matching sequences excluding exceptions
80
+ matching_sequences = set()
81
+ for word in text_words:
82
+ if word in query_words and word not in exceptions:
83
+ matching_sequences.add(word)
84
+
85
+ # Return the count of matching sequences
86
+ return len(matching_sequences)
87
+
88
+ # Function to find lecturer details using custom matching
89
+ def answer_lecturer_query(query):
90
+
91
+ query = query.lower()
92
+ max_score = 0
93
+ best_match = None
94
+
95
+ for index, row in lecturer_data.iterrows():
96
+ text = f"{row['course']} {row['course_code']} {row['name']}".lower()
97
+ score = word_lookup(query, text)
98
 
99
+ # Find the highest score
100
+ if score > max_score:
101
+ max_score = score
102
+ best_match = row
103
 
104
+ # Check if the query contains only one word
105
+ if len(query.split()) == 1:
106
+ return "I'm sorry, I need more information to assist you."
107
+
108
+ elif max_score >= 1:
109
+ # Process specific requests for phone number or office
110
+ if "phone number" in query or "number" in query:
111
+ if best_match['phone_number']:
112
+ return f"Sure! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's phone number is {best_match['phone_number']}."
113
+ else:
114
+ return f"Sorry, I don't recall the phone number for that lecturer."
115
+ elif "office" in query:
116
+ if best_match['office'] == "No longer in Babcock":
117
+ return f"Oops! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer is {best_match['office']}."
118
+ elif best_match['office']:
119
+ return f"Sure thing! {best_match['name']} the {best_match['course']} ({best_match['course_code']}) lecturer's office is at {best_match['office']}."
120
+ else:
121
+ return f"Sorry, I seem to have forgotten the office of that lecturer."
122
+ elif "lecturer" in query or "who" in query:
123
+ return f"{best_match['name']} is the {best_match['course']} ({best_match['course_code']}) lecturer."
124
+ elif "code" in query:
125
+ return f"The course code for {best_match['course']} is {best_match['course_code']}"
126
+ else:
127
+ return f"{best_match['course']} has the course code: {best_match['course_code']}"
128
+ else:
129
+ return answer_general_query(query)
130
+
131
+ def answer_doc_link_query(query):
132
+ query = query.lower()
133
+ max_score = 0
134
+ best_match = None
135
+
136
+ school_files = ["past questions", "pq", "pstq", "slides for"]
137
+ study_smarter = ["flashcards", "study set", "study", "study app", "study link", "slides", "today", "class", "lecturer"]
138
+
139
+ for index, row in doc_link_data.iterrows():
140
+ text = f"{row['course']} {row['course_code']}".lower()
141
+ score = word_lookup(query, text)
142
+
143
+ # Find the highest score
144
+ if score > max_score:
145
+ max_score = score
146
+ best_match = row
147
+
148
+ # Check if the query contains only one word
149
+ if len(query.split()) == 1:
150
+ return "I'm sorry, I need more information to assist you."
151
+
152
+ elif max_score >= 1:
153
+ if any(keyword in query for keyword in school_files):
154
+ if best_match['School files Link'] != "Unavailable":
155
+ return f"Looking for slides and/or past questions for {best_match['course']} ({best_match['course_code']})? This link should help you: {best_match['School files Link']}"
156
+ else:
157
+ return f"Oops! Sorry, I can't find slides or past questions for that course."
158
+ elif any(keyword in query for keyword in study_smarter):
159
+ if best_match['Study Smarter Link'] != "Unavailable":
160
+ return f"The Study Smarter study set for {best_match['course']} ({best_match['course_code']}) contains the recent slides sent by the lecturer (and possibly flashcards, notes, and more learning resources). The link to the study set: {best_match['Study Smarter Link']}"
161
+ else:
162
+ return f"I'm sorry, I can't find any study smarter study set for that course."
163
+
164
+ elif max_score < 1:
165
+ return "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the complete query."
166
+
167
+ else:
168
+ answer_general_query(query)
169
+
170
+
171
+ # Define function to determine intent
172
+ def get_intent(query):
173
+ # Define keywords or phrases associated with each intent
174
+ lecturer_keywords = ["lecturer", "lecturer's" "phone number", "number", "office", "who", "code", "course", "name"]
175
+ doc_link_keywords = ["past questions", "pstq", "study materials", "flashcards", "studysmarter",
176
+ "study smarter", "slides", "slide", "pdf"]
177
+ unknown_keywords = ["email", "missed", "write"]
178
+
179
+ # Check for keywords in the query
180
+ query_lower = query.lower()
181
+ if any(keyword in query_lower for keyword in unknown_keywords):
182
+ return "unknown"
183
+ elif any(keyword in query_lower for keyword in lecturer_keywords):
184
+ return "lecturer"
185
+ elif any(keyword in query_lower for keyword in doc_link_keywords):
186
+ return "doc_link"
187
+ else:
188
+ return "general"
189
+
190
+ # Define variables to track the previous query and response
191
+ previous_query = ""
192
+ previous_response = ""
193
+
194
+
195
+ def get_response(query):
196
+ global previous_query, previous_response
197
+
198
+ if previous_response.lower() == "Sure! To assist you better, please provide the name or code of the course you are referring to, along with the complete query.":
199
+ # Append the previous query to the current one
200
+ query = previous_query + " " + query
201
+
202
+ intent = get_intent(query)
203
+
204
+ if query == "":
205
+ response = "Yo! Don't send me stickers, I don't understand them anyway 😕"
206
+ elif intent == "unknown":
207
+ response = "Ugh, your query is quite beyond me. Please meet Anji directly :)"
208
+ elif intent == "lecturer":
209
+ response = answer_lecturer_query(query)
210
+ elif intent == 'doc_link':
211
+ response = answer_doc_link_query(query)
212
+ else:
213
+ response = answer_general_query(query)
214
+
215
+ # Update previous query and response
216
+ previous_query = query
217
+ previous_response = response
218
+
219
+ return response
220
+
221
+ iface = gr.Interface(
222
+ fn=get_response,
223
+ inputs=[gr.Textbox(label="User:", placeholder="Enter your query")],
224
+ outputs=[gr.Textbox(label="Anjibot:", lines=3, context=ChatContext(history=5))],
225
+ title="AnjBot",
226
+ description="Hello! I'm AnjiBot, CS Group A AI Course Rep. How may I assist you today?",
227
+ examples=[
228
+ ["I need Dr. Seun's phone number"],
229
+ ["When is the next class?"],
230
+ ["I need the slides from today's lectures."],
231
+ ],
232
+ additional_inputs=[
233
+ gr.Textbox(value="Please note that the data you share with Anjibot is not private.")]
234
+ )
235
+ iface.launch(share=True)