YoniFriedman commited on
Commit
a330832
·
verified ·
1 Parent(s): 5833cf1

Updating to json payload

Browse files
Files changed (1) hide show
  1. app.py +191 -71
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- os.environ["OPENAI_API_KEY"]
3
 
4
  from llama_index.llms.openai import OpenAI
5
  from llama_index.core.schema import MetadataMode
@@ -7,27 +7,14 @@ import openai
7
  from openai import OpenAI as OpenAIOG
8
  import logging
9
  import sys
10
- llm = OpenAI(temperature=0.0, model="gpt-4-turbo")
11
  client = OpenAIOG()
12
 
13
  from langdetect import detect
14
  from langdetect import DetectorFactory
15
  DetectorFactory.seed = 0
16
  from deep_translator import GoogleTranslator
17
-
18
- from sqlalchemy import (
19
- create_engine,
20
- MetaData,
21
- Table,
22
- Column,
23
- String,
24
- Integer,
25
- Date,
26
- select,
27
- column,
28
- insert,
29
- text
30
- )
31
 
32
  # Load index
33
  from llama_index.core import VectorStoreIndex
@@ -36,65 +23,196 @@ from llama_index.core import load_index_from_storage
36
  storage_context = StorageContext.from_defaults(persist_dir="arv_metadata")
37
  index = load_index_from_storage(storage_context)
38
  query_engine = index.as_query_engine(similarity_top_k=3, llm=llm)
39
- retriever = index.as_retriever(similarity_top_k=3)
40
 
41
  import gradio as gr
 
 
 
42
 
43
- def nishauri(question: str, ccc_user: str, conversation_history: list[str]):
44
-
45
- context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
 
 
 
 
46
 
47
- # Get patient info from DB
48
- engine = create_engine('sqlite:///nishauri.db')
49
-
50
- with engine.connect() as connection:
51
- # Select data using a parameterized query
52
- result = connection.execute(
53
- text("SELECT visit_date, visit_type, regimen, viral_load FROM nishauri WHERE ccc_no = :ccc_no"),
54
- {"ccc_no": ccc_user}
55
- )
56
-
57
- # Fetch and print results
58
- row = result.fetchall()
 
 
59
 
60
- last_appt = row[0][0]
61
- appt_purpose = row[0][1]
62
- regimen = row[0][2]
63
- vl_result = row[0][3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
65
 
66
- # Detect language of question - if Swahili, translate to English
67
- # only do this if there are at least 5 words in the text, otherwise lang detection is unreliable
 
 
 
 
 
 
 
 
68
 
69
- # Split the string into words
70
- words = question.split()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # Count the number of words
73
- num_words = len(words)
74
-
75
- lang_question = "en"
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- if num_words > 4:
78
- lang_question = detect(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- # lang_question = detect(question)
81
 
 
 
 
 
82
  if lang_question=="sw":
83
- question = GoogleTranslator(source='sw', target='en').translate(question)
84
 
 
85
  sources = retriever.retrieve(question)
86
  source0 = sources[0].text
87
  source1 = sources[1].text
88
-
89
- background = ("The person who asked the question is a person living with HIV."
90
- " If the person says sasa or niaje, that is swahili slang for hello. Just say hello back and ask how you can help."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  " Recognize that they already have HIV and do not suggest that they have to get tested"
92
  " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
93
  " Do not suggest anything that is not relevant to someone who already has HIV."
94
  " Do not mention in the response that the person is living with HIV."
95
- f" The person's last appointment was on {last_appt} and the purpose was {appt_purpose}. "
96
- f" The person is on the following regimen for HIV: {regimen}. "
97
- f" The person's most recent viral load result was {vl_result}. "
98
  " The following information about viral loads is authoritative for any question about viral loads:"
99
  " Under 50 copies/ml is low detectable level,"
100
  " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
@@ -102,41 +220,43 @@ def nishauri(question: str, ccc_user: str, conversation_history: list[str]):
102
  " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
103
  " A suppressed viral load is one below 200 copies / ml.")
104
 
 
105
  question_final = (
106
  f" The user previously asked and answered the following: {context}. "
107
  f" The user just asked the following question: {question}."
108
- f" Please use the following content to generate a response: {source0} {source1}."
109
- f" The following background on the user should also inform the response as needed: {background}"
110
  " Keep answers brief and limited to the question that was asked."
111
- " Do not provide information the user did not ask about. If they start with a greeting, just greet them in return and don't share anything else."
 
 
 
112
  )
113
-
 
114
  completion = client.chat.completions.create(
115
- model="gpt-4-turbo",
116
  messages=[
117
  {"role": "user", "content": question_final}
118
  ]
119
  )
120
-
121
  reply_to_user = completion.choices[0].message.content
122
-
123
-
124
- # If initial question was in Swahili, translate response back to Swahili
 
 
125
  if lang_question=="sw":
126
- reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
127
-
128
- conversation_history.append({"user": question, "chatbot": reply_to_user})
129
 
130
  return reply_to_user, conversation_history
131
 
132
-
133
  demo = gr.Interface(
134
  title = "Nishauri Chatbot Demo",
135
  fn=nishauri,
136
- inputs=[gr.Textbox(label="question", placeholder="Type your question here..."),
137
- gr.Textbox(label="CCC", placeholder="Type your ccc here..."),
138
- gr.State(value = [])],
139
  outputs=["text", gr.State()],
140
  )
141
 
142
- demo.launch()
 
1
  import os
2
+ os.environ["OPENAI_API_KEY"] = "sk-proj-SeS1zovo9pAJ7Smv3rZ3T3BlbkFJFN5hs2s9AsGmv1b7OiV1"
3
 
4
  from llama_index.llms.openai import OpenAI
5
  from llama_index.core.schema import MetadataMode
 
7
  from openai import OpenAI as OpenAIOG
8
  import logging
9
  import sys
10
+ llm = OpenAI(temperature=0.0, model="gpt-3.5-turbo")
11
  client = OpenAIOG()
12
 
13
  from langdetect import detect
14
  from langdetect import DetectorFactory
15
  DetectorFactory.seed = 0
16
  from deep_translator import GoogleTranslator
17
+ from lingua import Language, LanguageDetectorBuilder
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Load index
20
  from llama_index.core import VectorStoreIndex
 
23
  storage_context = StorageContext.from_defaults(persist_dir="arv_metadata")
24
  index = load_index_from_storage(storage_context)
25
  query_engine = index.as_query_engine(similarity_top_k=3, llm=llm)
26
+ retriever = index.as_retriever(similarity_top_k = 3)
27
 
28
  import gradio as gr
29
+ import re
30
+ import json
31
+ from datetime import datetime
32
 
33
+ acknowledgment_keywords_sw = ["sawa", "ndiyo", "naam", "hakika", "asante", "nimeelewa", "nimekupata", "ni kweli",
34
+ "kwa hakika", "nimesikia", "ahsante"]
35
+ acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"]
36
+ follow_up_keywords = ["but", "also", "and", "what", "how", "why", "when", "is", "?",
37
+ "lakini", "pia", "na", "nini", "vipi", "kwanini", "wakati"]
38
+ greeting_keywords_sw = ["sasa", "niaje", "habari", "mambo", "jambo", "shikamoo", "marahaba", "hujambo", "hamjambo", "salama", "vipi"]
39
+ greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"]
40
 
41
+ def contains_exact_word_or_phrase(text, keywords):
42
+ text = text.lower()
43
+ for keyword in keywords:
44
+ if re.search(r'\b' + re.escape(keyword) + r'\b', text):
45
+ return True
46
+ return False
47
+
48
+ def contains_greeting_sw(question):
49
+ # Check if the question contains acknowledgment keywords
50
+ return contains_exact_word_or_phrase(question, greeting_keywords_sw)
51
+
52
+ def contains_greeting_en(question):
53
+ # Check if the question contains acknowledgment keywords
54
+ return contains_exact_word_or_phrase(question, greeting_keywords_en)
55
 
56
+ def contains_acknowledgment_sw(question):
57
+ # Check if the question contains acknowledgment keywords
58
+ return contains_exact_word_or_phrase(question, acknowledgment_keywords_sw)
59
+
60
+ def contains_acknowledgment_en(question):
61
+ # Check if the question contains acknowledgment keywords
62
+ return contains_exact_word_or_phrase(question, acknowledgment_keywords_en)
63
+
64
+ def contains_follow_up(question):
65
+ # Check if the question contains follow-up indicators
66
+ return contains_exact_word_or_phrase(question, follow_up_keywords)
67
+
68
+ def convert_to_date(date_str):
69
+ return datetime.strptime(date_str, "%Y%m%d")
70
+
71
+ def detect_language(question):
72
+ # Check if the text has less than 5 words
73
+ if len(question.split()) < 5:
74
+ languages = [Language.ENGLISH, Language.SWAHILI] # Add more languages as needed
75
+ detector = LanguageDetectorBuilder.from_languages(*languages).build()
76
+ detected_language = detector.detect_language_of(question)
77
+ # Return language code for consistency
78
+ if detected_language == Language.SWAHILI:
79
+ return "sw"
80
+ elif detected_language == Language.ENGLISH:
81
+ return "en"
82
+ else:
83
+ try:
84
+ lang_detect = detect(question)
85
+ return lang_detect
86
+ except Exception as e:
87
+ print(f"Error with langdetect: {e}")
88
+ return "unknown"
89
+
90
+ def nishauri(user_params: str, conversation_history: list[str]):
91
+
92
+ # Get conversation history
93
+ context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
94
 
95
+ # Convert the user_params_str to a dictionary
96
+ user_params = json.loads(user_params)
97
 
98
+ ## Parse user params
99
+ consent = user_params.get("CONSENT")
100
+ person_info = user_params.get("PERSON_INFO", {})
101
+ gender = person_info.get("GENDER", "")
102
+ age = person_info.get("AGE", "")
103
+ vl_result = person_info.get("VIRAL_LOAD", "")
104
+ vl_date = convert_to_date(person_info.get("VIRAL_LOAD_DATETIME", ""))
105
+ next_appt_date = convert_to_date(person_info.get("APPOINTMENT_DATETIME", ""))
106
+ regimen = person_info.get("REGIMEN", "")
107
+ question = user_params.get("QUESTION")
108
 
109
+ ## Process greeting
110
+ # greet_response = process_greeting_response(question)
111
+ if contains_greeting_en(question) and not contains_follow_up(question):
112
+ greeting = (
113
+ f" The user previously asked and answered the following: {context}. "
114
+ f" The user just provided the following greeting: {question}. "
115
+ "Please respond accordingly in English."
116
+ )
117
+ completion = client.chat.completions.create(
118
+ model="gpt-4o",
119
+ messages=[
120
+ {"role": "user", "content": greeting}
121
+ ]
122
+ )
123
+ reply_to_user = completion.choices[0].message.content
124
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
125
+ return reply_to_user, conversation_history
126
 
127
+ if contains_greeting_sw(question) and not contains_follow_up(question):
128
+ greeting = (
129
+ f" The user previously asked and answered the following: {context}. "
130
+ f" The user just provided the following greeting: {question}. "
131
+ "Please respond accordingly in Swahili."
132
+ )
133
+ completion = client.chat.completions.create(
134
+ model="gpt-4o",
135
+ messages=[
136
+ {"role": "user", "content": greeting}
137
+ ]
138
+ )
139
+ reply_to_user = completion.choices[0].message.content
140
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
141
+ return reply_to_user, conversation_history
142
 
143
+ ## Process acknowledgment
144
+ if contains_acknowledgment_en(question) and not contains_follow_up(question):
145
+ acknowledgment = (
146
+ f" The user previously asked and answered the following: {context}. "
147
+ f" The user just provided the following acknowledgement: {question}. "
148
+ "Please respond accordingly in English."
149
+ )
150
+ completion = client.chat.completions.create(
151
+ model="gpt-4o",
152
+ messages=[
153
+ {"role": "user", "content": acknowledgment}
154
+ ]
155
+ )
156
+ reply_to_user = completion.choices[0].message.content
157
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
158
+ return reply_to_user, conversation_history
159
+
160
+ if contains_acknowledgment_sw(question) and not contains_follow_up(question):
161
+ acknowledgment = (
162
+ f" The user previously asked and answered the following: {context}. "
163
+ f" The user just provided the following acknowledgment: {question}. "
164
+ "Please respond accordingly in Swahili."
165
+ )
166
+ completion = client.chat.completions.create(
167
+ model="gpt-4o",
168
+ messages=[
169
+ {"role": "user", "content": acknowledgment}
170
+ ]
171
+ )
172
+ reply_to_user = completion.choices[0].message.content
173
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
174
+ return reply_to_user, conversation_history
175
 
176
+ # context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
177
 
178
+ ## If not greeting or acknowledgement, then proceed with RAG
179
+
180
+ ## Detect language of question - if Swahili, translate to English
181
+ lang_question = detect_language(question)
182
  if lang_question=="sw":
183
+ question = GoogleTranslator(source='sw', target='en').translate(question)
184
 
185
+ # Retrieve sources
186
  sources = retriever.retrieve(question)
187
  source0 = sources[0].text
188
  source1 = sources[1].text
189
+ source2 = sources[2].text
190
+
191
+ # If user consented, add user parameters, otherwise proceed with out
192
+ if consent == "YES":
193
+ background = ("The person who asked the question is a person living with HIV."
194
+ f" The person is {gender} and age is {age}. "
195
+ f" The person's next clinical check in is scheduled for {next_appt_date}. This has no bearing on when viral loads are taken. "
196
+ f" The person is on the following regimen for HIV {regimen}. "
197
+ f" The person's most recent viral load result was {vl_result} and it was taken on {vl_date}. "
198
+ " They are asking questions about HIV. Do not talk about anything that is not related to HIV. "
199
+ " Recognize that they already have HIV and do not suggest that they have to get tested"
200
+ " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
201
+ " Do not suggest anything that is not relevant to someone who already has HIV."
202
+ " Do not mention in the response that the person is living with HIV."
203
+ " The following information about viral loads is authoritative for any question about viral loads:"
204
+ " Under 50 copies/ml is low detectable level,"
205
+ " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
206
+ " 1000 and above is suspected treatment failure."
207
+ " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
208
+ " A suppressed viral load is one below 200 copies / ml.")
209
+ else:
210
+ background = ("The person who asked the question is a person living with HIV."
211
+ " They are asking questions about HIV. Do not talk about anything that is not related to HIV. "
212
  " Recognize that they already have HIV and do not suggest that they have to get tested"
213
  " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
214
  " Do not suggest anything that is not relevant to someone who already has HIV."
215
  " Do not mention in the response that the person is living with HIV."
 
 
 
216
  " The following information about viral loads is authoritative for any question about viral loads:"
217
  " Under 50 copies/ml is low detectable level,"
218
  " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
 
220
  " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
221
  " A suppressed viral load is one below 200 copies / ml.")
222
 
223
+ # Combine into final prompt - user background, conversation history, new question, retrieved sources
224
  question_final = (
225
  f" The user previously asked and answered the following: {context}. "
226
  f" The user just asked the following question: {question}."
227
+ f" Please use the following content to generate a response: {source0} {source1} {source2}."
228
+ f" Please consider the following background information when generating a response: {background}."
229
  " Keep answers brief and limited to the question that was asked."
230
+ " If they share a greeting, just greet them in return and ask if they have a question."
231
+ " Do not change the subject or address anything the user didn't directly ask about."
232
+ " If they respond with an acknowledgement, simply thank them ask if there is anything else that you can help with."
233
+ " Keep the response to under 50 words and use simple language. The user may not know technical terms."
234
  )
235
+
236
+ # Generate response
237
  completion = client.chat.completions.create(
238
+ model="gpt-4o",
239
  messages=[
240
  {"role": "user", "content": question_final}
241
  ]
242
  )
243
+ # Collect response
244
  reply_to_user = completion.choices[0].message.content
245
+
246
+ # add question and reply to conversation history
247
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
248
+
249
+ # If initial question was in swahili, translate response to swahili
250
  if lang_question=="sw":
251
+ reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
 
 
252
 
253
  return reply_to_user, conversation_history
254
 
 
255
  demo = gr.Interface(
256
  title = "Nishauri Chatbot Demo",
257
  fn=nishauri,
258
+ inputs=["text", gr.State(value=[])],
 
 
259
  outputs=["text", gr.State()],
260
  )
261
 
262
+ demo.launch()