Spaces:

Nishauri
/

ChatbotPersonalized

Runtime error

App Files Files Community

YoniFriedman commited on Jul 17, 2024

Commit

a330832

verified ·

1 Parent(s): 5833cf1

Updating to json payload

Browse files

Files changed (1) hide show

app.py +191 -71

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-os.environ["OPENAI_API_KEY"]
 from llama_index.llms.openai import OpenAI
 from llama_index.core.schema import MetadataMode
@@ -7,27 +7,14 @@ import openai
 from openai import OpenAI as OpenAIOG
 import logging
 import sys
-llm = OpenAI(temperature=0.0, model="gpt-4-turbo")
 client = OpenAIOG()
 from langdetect import detect
 from langdetect import DetectorFactory
 DetectorFactory.seed = 0
 from deep_translator import GoogleTranslator
-from sqlalchemy import (
-    create_engine,
-    MetaData,
-    Table,
-    Column,
-    String,
-    Integer,
-    Date,
-    select,
-    column,
-    insert,
-    text
-)
 # Load index
 from llama_index.core import VectorStoreIndex
@@ -36,65 +23,196 @@ from llama_index.core import load_index_from_storage
 storage_context = StorageContext.from_defaults(persist_dir="arv_metadata")
 index = load_index_from_storage(storage_context)
 query_engine = index.as_query_engine(similarity_top_k=3, llm=llm)
-retriever = index.as_retriever(similarity_top_k=3)
 import gradio as gr
-def nishauri(question: str, ccc_user: str, conversation_history: list[str]):
-    context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
-    # Get patient info from DB
-    engine = create_engine('sqlite:///nishauri.db')
-    with engine.connect() as connection:
-        # Select data using a parameterized query
-        result = connection.execute(
-            text("SELECT visit_date, visit_type, regimen, viral_load FROM nishauri WHERE ccc_no = :ccc_no"),
-            {"ccc_no": ccc_user}
-        )
-    # Fetch and print results
-    row = result.fetchall()
-    last_appt = row[0][0]
-    appt_purpose = row[0][1]
-    regimen = row[0][2]
-    vl_result = row[0][3]
-    # Detect language of question - if Swahili, translate to English
-    # only do this if there are at least 5 words in the text, otherwise lang detection is unreliable
-    # Split the string into words
-    words = question.split()
-    # Count the number of words
-    num_words = len(words)
-    lang_question = "en"
-    if num_words > 4:
-        lang_question = detect(question)
-#     lang_question = detect(question)
     if lang_question=="sw":
-        question = GoogleTranslator(source='sw', target='en').translate(question)
     sources = retriever.retrieve(question)
     source0 = sources[0].text
     source1 = sources[1].text
-    background = ("The person who asked the question is a person living with HIV."
-                  " If the person says sasa or niaje, that is swahili slang for hello. Just say hello back and ask how you can help."
                   " Recognize that they already have HIV and do not suggest that they have to get tested"
                   " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
                   " Do not suggest anything that is not relevant to someone who already has HIV."
                   " Do not mention in the response that the person is living with HIV."
-                  f" The person's last appointment was on {last_appt} and the purpose was {appt_purpose}. "
-                  f" The person is on the following regimen for HIV: {regimen}. "
-                  f" The person's most recent viral load result was {vl_result}. "
                   " The following information about viral loads is authoritative for any question about viral loads:"
                   " Under 50 copies/ml is low detectable level,"
                   " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
@@ -102,41 +220,43 @@ def nishauri(question: str, ccc_user: str, conversation_history: list[str]):
                   " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
                   " A suppressed viral load is one below 200 copies / ml.")
     question_final = (
         f" The user previously asked and answered the following: {context}. "
         f" The user just asked the following question: {question}."
-        f" Please use the following content to generate a response: {source0} {source1}."
-        f" The following background on the user should also inform the response as needed: {background}"
         " Keep answers brief and limited to the question that was asked."
-        " Do not provide information the user did not ask about. If they start with a greeting, just greet them in return and don't share anything else."
     )
     completion = client.chat.completions.create(
-      model="gpt-4-turbo",
         messages=[
         {"role": "user", "content": question_final}
       ]
     )
     reply_to_user = completion.choices[0].message.content
-    # If initial question was in Swahili, translate response back to Swahili
     if lang_question=="sw":
-        reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
-    conversation_history.append({"user": question, "chatbot": reply_to_user})
     return reply_to_user, conversation_history
 demo = gr.Interface(
     title = "Nishauri Chatbot Demo",
     fn=nishauri,
-    inputs=[gr.Textbox(label="question", placeholder="Type your question here..."),
-            gr.Textbox(label="CCC", placeholder="Type your ccc here..."),
-            gr.State(value = [])],
     outputs=["text", gr.State()],
 )
-demo.launch()

 import os
+os.environ["OPENAI_API_KEY"] = "sk-proj-SeS1zovo9pAJ7Smv3rZ3T3BlbkFJFN5hs2s9AsGmv1b7OiV1"
 from llama_index.llms.openai import OpenAI
 from llama_index.core.schema import MetadataMode
 from openai import OpenAI as OpenAIOG
 import logging
 import sys
+llm = OpenAI(temperature=0.0, model="gpt-3.5-turbo")
 client = OpenAIOG()
 from langdetect import detect
 from langdetect import DetectorFactory
 DetectorFactory.seed = 0
 from deep_translator import GoogleTranslator
+from lingua import Language, LanguageDetectorBuilder
 # Load index
 from llama_index.core import VectorStoreIndex
 storage_context = StorageContext.from_defaults(persist_dir="arv_metadata")
 index = load_index_from_storage(storage_context)
 query_engine = index.as_query_engine(similarity_top_k=3, llm=llm)
+retriever = index.as_retriever(similarity_top_k = 3)
 import gradio as gr
+import re
+import json
+from datetime import datetime
+acknowledgment_keywords_sw = ["sawa", "ndiyo", "naam", "hakika", "asante", "nimeelewa", "nimekupata", "ni kweli",
+                             "kwa hakika", "nimesikia", "ahsante"]
+acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"]
+follow_up_keywords = ["but", "also", "and", "what", "how", "why", "when", "is", "?",
+                     "lakini", "pia", "na", "nini", "vipi", "kwanini", "wakati"]
+greeting_keywords_sw = ["sasa", "niaje", "habari", "mambo", "jambo", "shikamoo", "marahaba", "hujambo", "hamjambo", "salama", "vipi"]
+greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"]
+def contains_exact_word_or_phrase(text, keywords):
+    text = text.lower()
+    for keyword in keywords:
+        if re.search(r'\b' + re.escape(keyword) + r'\b', text):
+            return True
+    return False
+def contains_greeting_sw(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, greeting_keywords_sw)
+def contains_greeting_en(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, greeting_keywords_en)
+def contains_acknowledgment_sw(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, acknowledgment_keywords_sw)
+def contains_acknowledgment_en(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, acknowledgment_keywords_en)
+def contains_follow_up(question):
+    # Check if the question contains follow-up indicators
+    return contains_exact_word_or_phrase(question, follow_up_keywords)
+def convert_to_date(date_str):
+    return datetime.strptime(date_str, "%Y%m%d")
+def detect_language(question):
+    # Check if the text has less than 5 words
+    if len(question.split()) < 5:
+        languages = [Language.ENGLISH, Language.SWAHILI]  # Add more languages as needed
+        detector = LanguageDetectorBuilder.from_languages(*languages).build()
+        detected_language = detector.detect_language_of(question)
+        # Return language code for consistency
+        if detected_language == Language.SWAHILI:
+            return "sw"
+        elif detected_language == Language.ENGLISH:
+            return "en"
+    else:
+        try:
+            lang_detect = detect(question)
+            return lang_detect
+        except Exception as e:
+            print(f"Error with langdetect: {e}")
+            return "unknown"
+def nishauri(user_params: str, conversation_history: list[str]):
+    # Get conversation history
+    context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
+    # Convert the user_params_str to a dictionary
+    user_params = json.loads(user_params)
+    ## Parse user params
+    consent = user_params.get("CONSENT")
+    person_info = user_params.get("PERSON_INFO", {})
+    gender = person_info.get("GENDER", "")
+    age = person_info.get("AGE", "")
+    vl_result = person_info.get("VIRAL_LOAD", "")
+    vl_date = convert_to_date(person_info.get("VIRAL_LOAD_DATETIME", ""))
+    next_appt_date = convert_to_date(person_info.get("APPOINTMENT_DATETIME", ""))
+    regimen = person_info.get("REGIMEN", "")
+    question = user_params.get("QUESTION")
+    ## Process greeting
+    # greet_response = process_greeting_response(question)
+    if contains_greeting_en(question) and not contains_follow_up(question):
+            greeting = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following greeting: {question}. "
+                "Please respond accordingly in English."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": greeting}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    if contains_greeting_sw(question) and not contains_follow_up(question):
+            greeting = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following greeting: {question}. "
+                "Please respond accordingly in Swahili."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": greeting}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    ## Process acknowledgment
+    if contains_acknowledgment_en(question) and not contains_follow_up(question):
+            acknowledgment = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following acknowledgement: {question}. "
+                "Please respond accordingly in English."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": acknowledgment}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    if contains_acknowledgment_sw(question) and not contains_follow_up(question):
+            acknowledgment = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following acknowledgment: {question}. "
+                "Please respond accordingly in Swahili."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": acknowledgment}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    # context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
+    ## If not greeting or acknowledgement, then proceed with RAG
+    ## Detect language of question - if Swahili, translate to English
+    lang_question = detect_language(question)
     if lang_question=="sw":
+        question = GoogleTranslator(source='sw', target='en').translate(question)
+    # Retrieve sources
     sources = retriever.retrieve(question)
     source0 = sources[0].text
     source1 = sources[1].text
+    source2 = sources[2].text
+    # If user consented, add user parameters, otherwise proceed with out
+    if consent == "YES":
+            background = ("The person who asked the question is a person living with HIV."
+                          f" The person is {gender} and age is {age}. "
+                          f" The person's next clinical check in is scheduled for {next_appt_date}. This has no bearing on when viral loads are taken. "
+                          f" The person is on the following regimen for HIV {regimen}. "
+                          f" The person's most recent viral load result was {vl_result} and it was taken on {vl_date}. "
+                          " They are asking questions about HIV. Do not talk about anything that is not related to HIV. "
+                          " Recognize that they already have HIV and do not suggest that they have to get tested"
+                          " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
+                          " Do not suggest anything that is not relevant to someone who already has HIV."
+                          " Do not mention in the response that the person is living with HIV."
+                          " The following information about viral loads is authoritative for any question about viral loads:"
+                          " Under 50 copies/ml is low detectable level,"
+                          " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
+                          " 1000 and above is suspected treatment failure."
+                          " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
+                          " A suppressed viral load is one below 200 copies / ml.")
+    else:
+            background = ("The person who asked the question is a person living with HIV."
+                  " They are asking questions about HIV. Do not talk about anything that is not related to HIV. "
                   " Recognize that they already have HIV and do not suggest that they have to get tested"
                   " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
                   " Do not suggest anything that is not relevant to someone who already has HIV."
                   " Do not mention in the response that the person is living with HIV."
                   " The following information about viral loads is authoritative for any question about viral loads:"
                   " Under 50 copies/ml is low detectable level,"
                   " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
                   " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
                   " A suppressed viral load is one below 200 copies / ml.")
+    # Combine into final prompt - user background, conversation history, new question, retrieved sources
     question_final = (
         f" The user previously asked and answered the following: {context}. "
         f" The user just asked the following question: {question}."
+        f" Please use the following content to generate a response: {source0} {source1} {source2}."
+        f" Please consider the following background information when generating a response: {background}."
         " Keep answers brief and limited to the question that was asked."
+        " If they share a greeting, just greet them in return and ask if they have a question."
+        " Do not change the subject or address anything the user didn't directly ask about."
+        " If they respond with an acknowledgement, simply thank them ask if there is anything else that you can help with."
+        " Keep the response to under 50 words and use simple language. The user may not know technical terms."
     )
+    # Generate response
     completion = client.chat.completions.create(
+      model="gpt-4o",
         messages=[
         {"role": "user", "content": question_final}
       ]
     )
+    # Collect response
     reply_to_user = completion.choices[0].message.content
+    # add question and reply to conversation history
+    conversation_history.append({"user": question, "chatbot": reply_to_user})
+    # If initial question was in swahili, translate response to swahili
     if lang_question=="sw":
+        reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
     return reply_to_user, conversation_history
 demo = gr.Interface(
     title = "Nishauri Chatbot Demo",
     fn=nishauri,
+    inputs=["text", gr.State(value=[])],
     outputs=["text", gr.State()],
 )
+demo.launch()