YoniFriedman commited on
Commit
a0630d4
·
verified ·
1 Parent(s): adb0802

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -63
app.py CHANGED
@@ -14,6 +14,7 @@ from langdetect import detect
14
  from langdetect import DetectorFactory
15
  DetectorFactory.seed = 0
16
  from deep_translator import GoogleTranslator
 
17
 
18
  # Load index
19
  from llama_index.core import VectorStoreIndex
@@ -26,54 +27,204 @@ retriever = index.as_retriever(similarity_top_k = 3)
26
 
27
  import gradio as gr
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def nishauri(question: str, conversation_history: list[str]):
30
-
31
-
32
  context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- # Split the string into words
35
- words = question.split()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Count the number of words
38
- num_words = len(words)
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- lang_question = "en"
41
-
42
- if num_words > 4:
43
- lang_question = detect(question)
44
 
 
 
45
  if lang_question=="sw":
46
  question = GoogleTranslator(source='sw', target='en').translate(question)
47
-
 
48
  sources = retriever.retrieve(question)
49
  source0 = sources[0].text
50
  source1 = sources[1].text
51
  source2 = sources[2].text
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  background = ("The person who asked the question is a person living with HIV."
54
- " If the person says sasa or niaje, that is swahili slang for hello."
55
- " Recognize that they already have HIV and do not suggest that they have to get tested"
56
- " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
57
- " Do not suggest anything that is not relevant to someone who already has HIV."
58
- " Do not mention in the response that the person is living with HIV."
59
- " The following information about viral loads is authoritative for any question about viral loads:"
60
- " Under 50 copies/ml is low detectable level,"
61
- " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
62
- " 1000 and above is suspected treatment failure."
63
- " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
64
- " A suppressed viral load is one below 200 copies / ml.")
 
65
 
66
  question_final = (
67
  f" The user previously asked and answered the following: {context}. "
68
  f" The user just asked the following question: {question}."
69
  f" Please use the following content to generate a response: {source0} {source1} {source2}."
70
- f" Please update the response provided only if needed, based on the following background information {background}."
71
  " Keep answers brief and limited to the question that was asked."
72
- " Do not provide information the user did not ask about. If they start with a greeting, just greet them in return and don't share anything else."
 
 
 
 
 
73
  )
74
-
75
  completion = client.chat.completions.create(
76
- model="gpt-4-turbo",
77
  messages=[
78
  {"role": "user", "content": question_final}
79
  ]
@@ -84,40 +235,20 @@ def nishauri(question: str, conversation_history: list[str]):
84
  if lang_question=="sw":
85
  reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
86
 
87
- conversation_history.append({"user": question, "chatbot": reply_to_user})
88
-
89
- source1 = ("File Name: " +
90
- sources[0].metadata["file_name"] +
91
- "\nPage Number: " +
92
- sources[0].metadata["page_label"] +
93
- "\n Source Text: " +
94
- sources[0].text)
95
-
96
- source2 = ("File Name: " +
97
- sources[1].metadata["file_name"] +
98
- "\nPage Number: " +
99
- sources[1].metadata["page_label"] +
100
- "\n Source Text: " +
101
- sources[1].text)
102
-
103
- source3 = ("File Name: " +
104
- sources[2].metadata["file_name"] +
105
- "\nPage Number: " +
106
- sources[2].metadata["page_label"] +
107
- "\n Source Text: " +
108
- sources[2].text)
109
-
110
- return reply_to_user, source1, source2, source3, conversation_history
111
-
112
- inputs = [gr.Textbox(lines=10, label="Question"),
113
- gr.State(value=[])]
114
- outputs = [
115
- gr.Textbox(label="Chatbot Response", type="text"),
116
- gr.Textbox(label="Source 1", max_lines = 10, autoscroll = False, type="text"),
117
- gr.Textbox(label="Source 2", max_lines = 10, autoscroll = False, type="text"),
118
- gr.Textbox(label="Source 3", max_lines = 10, autoscroll = False, type="text"),
119
- gr.State()
120
- ]
121
-
122
- gr.Interface(fn=nishauri, inputs=inputs, outputs=outputs, title="Nishauri Chatbot",
123
- description="Enter a question and see the processed outputs in collapsible boxes.").launch()
 
14
  from langdetect import DetectorFactory
15
  DetectorFactory.seed = 0
16
  from deep_translator import GoogleTranslator
17
+ from lingua import Language, LanguageDetectorBuilder
18
 
19
  # Load index
20
  from llama_index.core import VectorStoreIndex
 
27
 
28
  import gradio as gr
29
 
30
+ import re
31
+ import json
32
+ from datetime import datetime
33
+
34
+ acknowledgment_keywords_sw = ["sawa", "ndiyo", "naam", "hakika", "asante", "nimeelewa", "nimekupata", "ni kweli",
35
+ "kwa hakika", "nimesikia"]
36
+ acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"]
37
+ follow_up_keywords = ["but", "also", "and", "what", "how", "why", "when", "is", "?",
38
+ "lakini", "pia", "na", "nini", "vipi", "kwanini", "wakati"]
39
+ greeting_keywords_sw = ["sasa", "niaje", "habari", "mambo", "jambo", "shikamoo", "marahaba", "hujambo", "hamjambo", "salama", "vipi"]
40
+ greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"]
41
+
42
+ def contains_exact_word_or_phrase(text, keywords):
43
+ text = text.lower()
44
+ for keyword in keywords:
45
+ if re.search(r'\b' + re.escape(keyword) + r'\b', text):
46
+ return True
47
+ return False
48
+
49
+ def contains_greeting_sw(question):
50
+ # Check if the question contains acknowledgment keywords
51
+ return contains_exact_word_or_phrase(question, greeting_keywords_sw)
52
+
53
+ def contains_greeting_en(question):
54
+ # Check if the question contains acknowledgment keywords
55
+ return contains_exact_word_or_phrase(question, greeting_keywords_en)
56
+
57
+ def contains_acknowledgment_sw(question):
58
+ # Check if the question contains acknowledgment keywords
59
+ return contains_exact_word_or_phrase(question, acknowledgment_keywords_sw)
60
+
61
+ def contains_acknowledgment_en(question):
62
+ # Check if the question contains acknowledgment keywords
63
+ return contains_exact_word_or_phrase(question, acknowledgment_keywords_en)
64
+
65
+ def contains_follow_up(question):
66
+ # Check if the question contains follow-up indicators
67
+ return contains_exact_word_or_phrase(question, follow_up_keywords)
68
+
69
+ def convert_to_date(date_str):
70
+ return datetime.strptime(date_str, "%Y%m%d")
71
+
72
+ def detect_language(question):
73
+ # Check if the text has less than 5 words
74
+ if len(question.split()) < 5:
75
+ languages = [Language.ENGLISH, Language.SWAHILI] # Add more languages as needed
76
+ detector = LanguageDetectorBuilder.from_languages(*languages).build()
77
+ detected_language = detector.detect_language_of(question)
78
+ # Return language code for consistency
79
+ if detected_language == Language.SWAHILI:
80
+ return "sw"
81
+ elif detected_language == Language.ENGLISH:
82
+ return "en"
83
+ else:
84
+ try:
85
+ lang_detect = detect(question)
86
+ return lang_detect
87
+ except Exception as e:
88
+ print(f"Error with langdetect: {e}")
89
+ return "unknown"
90
+
91
  def nishauri(question: str, conversation_history: list[str]):
92
+
93
+ # Get conversation history
94
  context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
95
+
96
+ ## Process greeting
97
+ # greet_response = process_greeting_response(question)
98
+ if contains_greeting_en(question) and not contains_follow_up(question):
99
+ greeting = (
100
+ f" The user previously asked and answered the following: {context}. "
101
+ f" The user just provided the following greeting: {question}. "
102
+ "Please respond accordingly in English."
103
+ )
104
+ completion = client.chat.completions.create(
105
+ model="gpt-4o",
106
+ messages=[
107
+ {"role": "user", "content": greeting}
108
+ ]
109
+ )
110
+ reply_to_user = completion.choices[0].message.content
111
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
112
+ return reply_to_user, conversation_history
113
 
114
+ if contains_greeting_sw(question) and not contains_follow_up(question):
115
+ greeting = (
116
+ f" The user previously asked and answered the following: {context}. "
117
+ f" The user just provided the following greeting: {question}. "
118
+ "Please respond accordingly in Swahili."
119
+ )
120
+ completion = client.chat.completions.create(
121
+ model="gpt-4o",
122
+ messages=[
123
+ {"role": "user", "content": greeting}
124
+ ]
125
+ )
126
+ reply_to_user = completion.choices[0].message.content
127
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
128
+ return reply_to_user, conversation_history
129
+
130
+ ## Process acknowledgment
131
+ if contains_acknowledgment_en(question) and not contains_follow_up(question):
132
+ acknowledgment = (
133
+ f" The user previously asked and answered the following: {context}. "
134
+ f" The user just provided the following acknowledgement: {question}. "
135
+ "Please respond accordingly in English."
136
+ )
137
+ completion = client.chat.completions.create(
138
+ model="gpt-4o",
139
+ messages=[
140
+ {"role": "user", "content": acknowledgment}
141
+ ]
142
+ )
143
+ reply_to_user = completion.choices[0].message.content
144
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
145
+ return reply_to_user, conversation_history
146
 
147
+ if contains_acknowledgment_sw(question) and not contains_follow_up(question):
148
+ acknowledgment = (
149
+ f" The user previously asked and answered the following: {context}. "
150
+ f" The user just provided the following acknowledgment: {question}. "
151
+ "Please respond accordingly in Swahili."
152
+ )
153
+ completion = client.chat.completions.create(
154
+ model="gpt-4o",
155
+ messages=[
156
+ {"role": "user", "content": acknowledgment}
157
+ ]
158
+ )
159
+ reply_to_user = completion.choices[0].message.content
160
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
161
+ return reply_to_user, conversation_history
162
 
163
+ ## If not greeting or acknowledgement, then proceed with RAG
 
 
 
164
 
165
+ ## Detect language of question - if Swahili, translate to English
166
+ lang_question = detect_language(question)
167
  if lang_question=="sw":
168
  question = GoogleTranslator(source='sw', target='en').translate(question)
169
+
170
+ # Now, retrieve relevant sources
171
  sources = retriever.retrieve(question)
172
  source0 = sources[0].text
173
  source1 = sources[1].text
174
  source2 = sources[2].text
175
 
176
+ source1return = ("File Name: " +
177
+ sources[0].metadata["file_name"] +
178
+ "\nPage Number: " +
179
+ sources[0].metadata["page_label"] +
180
+ "\n Source Text: " +
181
+ sources[0].text)
182
+
183
+ source2return = ("File Name: " +
184
+ sources[1].metadata["file_name"] +
185
+ "\nPage Number: " +
186
+ sources[1].metadata["page_label"] +
187
+ "\n Source Text: " +
188
+ sources[1].text)
189
+
190
+ source3return = ("File Name: " +
191
+ sources[2].metadata["file_name"] +
192
+ "\nPage Number: " +
193
+ sources[2].metadata["page_label"] +
194
+ "\n Source Text: " +
195
+ sources[2].text)
196
+
197
+
198
  background = ("The person who asked the question is a person living with HIV."
199
+ " They are asking questions about HIV. Do not talk about anything that is not related to HIV. "
200
+ " Recognize that they already have HIV and do not suggest that they have to get tested"
201
+ " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
202
+ " Do not suggest anything that is not relevant to someone who already has HIV."
203
+ " Do not mention in the response that the person is living with HIV."
204
+ " The following information about viral loads is authoritative for any question about viral loads:"
205
+ # " Under 50 copies/ml is low detectable level,"
206
+ # " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
207
+ # " 1000 and above is suspected treatment failure."
208
+ " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
209
+ " A viral load above 1000 copies/ml suggests treatment failure."
210
+ " A suppressed viral load is one below 200 copies / ml.")
211
 
212
  question_final = (
213
  f" The user previously asked and answered the following: {context}. "
214
  f" The user just asked the following question: {question}."
215
  f" Please use the following content to generate a response: {source0} {source1} {source2}."
216
+ f" Please consider the following background information when generating a response: {background}."
217
  " Keep answers brief and limited to the question that was asked."
218
+ " If they share a greeting, just greet them in return and ask if they have a question."
219
+ " Do not change the subject or address anything the user didn't directly ask about."
220
+ " If they respond with an acknowledgement, simply thank them."
221
+ " Do not discuss anything other than HIV. If they ask a question that is not about HIV, respond that"
222
+ " you are only able to discuss HIV."
223
+ " Keep the response to under 50 words and use simple language. The person asking the question does not know technical terms."
224
  )
225
+
226
  completion = client.chat.completions.create(
227
+ model="gpt-4o",
228
  messages=[
229
  {"role": "user", "content": question_final}
230
  ]
 
235
  if lang_question=="sw":
236
  reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
237
 
238
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
239
+
240
+ return reply_to_user, source1return, source2return, source3return, conversation_history
241
+
242
+ demo = gr.Interface(
243
+ title = "Nuru Chatbot Demo",
244
+ description="Enter a question and see the processed outputs in collapsible boxes."
245
+ fn=nishauri,
246
+ inputs=["text", gr.State(value=[])],
247
+ outputs=[
248
+ gr.Textbox(label = "Nuru Response", type = "text"),
249
+ gr.Textbox(label = "Source 1", max_lines = 10, autoscroll = False, type = "text"),
250
+ gr.Textbox(label = "Source 2", max_lines = 10, autoscroll = False, type = "text"),
251
+ gr.Textbox(label = "Source 3", max_lines = 10, autoscroll = False, type = "text"),
252
+ gr.State()
253
+ ],
254
+ )