Spaces:

Nishauri
/

ChatbotSources

Build error

App Files Files Community

YoniFriedman commited on Apr 25, 2025

Commit

a0630d4

verified ·

1 Parent(s): adb0802

Update app.py

Browse files

Files changed (1) hide show

app.py +194 -63

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ from langdetect import detect
 from langdetect import DetectorFactory
 DetectorFactory.seed = 0
 from deep_translator import GoogleTranslator
 # Load index
 from llama_index.core import VectorStoreIndex
@@ -26,54 +27,204 @@ retriever = index.as_retriever(similarity_top_k = 3)
 import gradio as gr
 def nishauri(question: str, conversation_history: list[str]):
     context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
-   # Split the string into words
-    words = question.split()
-    # Count the number of words
-    num_words = len(words)
-    lang_question = "en"
-    if num_words > 4:
-        lang_question = detect(question)
     if lang_question=="sw":
         question = GoogleTranslator(source='sw', target='en').translate(question)
     sources = retriever.retrieve(question)
     source0 = sources[0].text
     source1 = sources[1].text
     source2 = sources[2].text
     background = ("The person who asked the question is a person living with HIV."
-                  " If the person says sasa or niaje, that is swahili slang for hello."
-            " Recognize that they already have HIV and do not suggest that they have to get tested"
-            " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
-            " Do not suggest anything that is not relevant to someone who already has HIV."
-             " Do not mention in the response that the person is living with HIV."
-            " The following information about viral loads is authoritative for any question about viral loads:"
-           " Under 50 copies/ml is low detectable level,"
-           " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
-           " 1000 and above is suspected treatment failure."
-           " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
-           " A suppressed viral load is one below 200 copies / ml.")
     question_final = (
         f" The user previously asked and answered the following: {context}. "
         f" The user just asked the following question: {question}."
         f" Please use the following content to generate a response: {source0} {source1} {source2}."
-        f" Please update the response provided only if needed, based on the following background information {background}."
         " Keep answers brief and limited to the question that was asked."
-        " Do not provide information the user did not ask about. If they start with a greeting, just greet them in return and don't share anything else."
     )
     completion = client.chat.completions.create(
-      model="gpt-4-turbo",
         messages=[
         {"role": "user", "content": question_final}
       ]
@@ -84,40 +235,20 @@ def nishauri(question: str, conversation_history: list[str]):
     if lang_question=="sw":
         reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
-    conversation_history.append({"user": question, "chatbot": reply_to_user})
-    source1 = ("File Name: " +
-      sources[0].metadata["file_name"] +
-      "\nPage Number: " +
-      sources[0].metadata["page_label"] +
-      "\n Source Text: " +
-     sources[0].text)
-    source2 = ("File Name: " +
-      sources[1].metadata["file_name"] +
-      "\nPage Number: " +
-      sources[1].metadata["page_label"] +
-      "\n Source Text: " +
-     sources[1].text)
-    source3 = ("File Name: " +
-      sources[2].metadata["file_name"] +
-      "\nPage Number: " +
-      sources[2].metadata["page_label"] +
-      "\n Source Text: " +
-     sources[2].text)
-    return reply_to_user, source1, source2, source3, conversation_history
-inputs = [gr.Textbox(lines=10, label="Question"),
-          gr.State(value=[])]
-outputs = [
-    gr.Textbox(label="Chatbot Response", type="text"),
-    gr.Textbox(label="Source 1", max_lines = 10, autoscroll = False, type="text"),
-    gr.Textbox(label="Source 2", max_lines = 10, autoscroll = False, type="text"),
-    gr.Textbox(label="Source 3", max_lines = 10, autoscroll = False, type="text"),
-    gr.State()
-]
-gr.Interface(fn=nishauri, inputs=inputs, outputs=outputs, title="Nishauri Chatbot",
-             description="Enter a question and see the processed outputs in collapsible boxes.").launch()

 from langdetect import DetectorFactory
 DetectorFactory.seed = 0
 from deep_translator import GoogleTranslator
+from lingua import Language, LanguageDetectorBuilder
 # Load index
 from llama_index.core import VectorStoreIndex
 import gradio as gr
+import re
+import json
+from datetime import datetime
+acknowledgment_keywords_sw = ["sawa", "ndiyo", "naam", "hakika", "asante", "nimeelewa", "nimekupata", "ni kweli",
+                             "kwa hakika", "nimesikia"]
+acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"]
+follow_up_keywords = ["but", "also", "and", "what", "how", "why", "when", "is", "?",
+                     "lakini", "pia", "na", "nini", "vipi", "kwanini", "wakati"]
+greeting_keywords_sw = ["sasa", "niaje", "habari", "mambo", "jambo", "shikamoo", "marahaba", "hujambo", "hamjambo", "salama", "vipi"]
+greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"]
+def contains_exact_word_or_phrase(text, keywords):
+    text = text.lower()
+    for keyword in keywords:
+        if re.search(r'\b' + re.escape(keyword) + r'\b', text):
+            return True
+    return False
+def contains_greeting_sw(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, greeting_keywords_sw)
+def contains_greeting_en(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, greeting_keywords_en)
+def contains_acknowledgment_sw(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, acknowledgment_keywords_sw)
+def contains_acknowledgment_en(question):
+    # Check if the question contains acknowledgment keywords
+    return contains_exact_word_or_phrase(question, acknowledgment_keywords_en)
+def contains_follow_up(question):
+    # Check if the question contains follow-up indicators
+    return contains_exact_word_or_phrase(question, follow_up_keywords)
+def convert_to_date(date_str):
+    return datetime.strptime(date_str, "%Y%m%d")
+def detect_language(question):
+    # Check if the text has less than 5 words
+    if len(question.split()) < 5:
+        languages = [Language.ENGLISH, Language.SWAHILI]  # Add more languages as needed
+        detector = LanguageDetectorBuilder.from_languages(*languages).build()
+        detected_language = detector.detect_language_of(question)
+        # Return language code for consistency
+        if detected_language == Language.SWAHILI:
+            return "sw"
+        elif detected_language == Language.ENGLISH:
+            return "en"
+    else:
+        try:
+            lang_detect = detect(question)
+            return lang_detect
+        except Exception as e:
+            print(f"Error with langdetect: {e}")
+            return "unknown"
 def nishauri(question: str, conversation_history: list[str]):
+    # Get conversation history
     context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
+    ## Process greeting
+    # greet_response = process_greeting_response(question)
+    if contains_greeting_en(question) and not contains_follow_up(question):
+            greeting = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following greeting: {question}. "
+                "Please respond accordingly in English."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": greeting}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    if contains_greeting_sw(question) and not contains_follow_up(question):
+            greeting = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following greeting: {question}. "
+                "Please respond accordingly in Swahili."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": greeting}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    ## Process acknowledgment
+    if contains_acknowledgment_en(question) and not contains_follow_up(question):
+            acknowledgment = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following acknowledgement: {question}. "
+                "Please respond accordingly in English."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": acknowledgment}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    if contains_acknowledgment_sw(question) and not contains_follow_up(question):
+            acknowledgment = (
+                f" The user previously asked and answered the following: {context}. "
+                f" The user just provided the following acknowledgment: {question}. "
+                "Please respond accordingly in Swahili."
+            )
+            completion = client.chat.completions.create(
+              model="gpt-4o",
+                messages=[
+                {"role": "user", "content": acknowledgment}
+              ]
+            )
+            reply_to_user = completion.choices[0].message.content
+            conversation_history.append({"user": question, "chatbot": reply_to_user})
+            return reply_to_user, conversation_history
+    ## If not greeting or acknowledgement, then proceed with RAG
+    ## Detect language of question - if Swahili, translate to English
+    lang_question = detect_language(question)
     if lang_question=="sw":
         question = GoogleTranslator(source='sw', target='en').translate(question)
+    # Now, retrieve relevant sources
     sources = retriever.retrieve(question)
     source0 = sources[0].text
     source1 = sources[1].text
     source2 = sources[2].text
+    source1return = ("File Name: " +
+                     sources[0].metadata["file_name"] +
+                     "\nPage Number: " +
+                     sources[0].metadata["page_label"] +
+                     "\n Source Text: " +
+                     sources[0].text)
+    source2return = ("File Name: " +
+                     sources[1].metadata["file_name"] +
+                     "\nPage Number: " +
+                     sources[1].metadata["page_label"] +
+                     "\n Source Text: " +
+                     sources[1].text)
+    source3return = ("File Name: " +
+                     sources[2].metadata["file_name"] +
+                     "\nPage Number: " +
+                     sources[2].metadata["page_label"] +
+                     "\n Source Text: " +
+                     sources[2].text)
     background = ("The person who asked the question is a person living with HIV."
+          " They are asking questions about HIV. Do not talk about anything that is not related to HIV. "
+          " Recognize that they already have HIV and do not suggest that they have to get tested"
+          " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."
+          " Do not suggest anything that is not relevant to someone who already has HIV."
+          " Do not mention in the response that the person is living with HIV."
+          " The following information about viral loads is authoritative for any question about viral loads:"
+          # " Under 50 copies/ml is low detectable level,"
+          # " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
+          # " 1000 and above is suspected treatment failure."
+          " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
+          " A viral load above 1000 copies/ml suggests treatment failure."
+          " A suppressed viral load is one below 200 copies / ml.")
     question_final = (
         f" The user previously asked and answered the following: {context}. "
         f" The user just asked the following question: {question}."
         f" Please use the following content to generate a response: {source0} {source1} {source2}."
+        f" Please consider the following background information when generating a response: {background}."
         " Keep answers brief and limited to the question that was asked."
+        " If they share a greeting, just greet them in return and ask if they have a question."
+        " Do not change the subject or address anything the user didn't directly ask about."
+        " If they respond with an acknowledgement, simply thank them."
+        " Do not discuss anything other than HIV. If they ask a question that is not about HIV, respond that"
+        " you are only able to discuss HIV."
+        " Keep the response to under 50 words and use simple language. The person asking the question does not know technical terms."
     )
     completion = client.chat.completions.create(
+      model="gpt-4o",
         messages=[
         {"role": "user", "content": question_final}
       ]
     if lang_question=="sw":
         reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
+    conversation_history.append({"user": question, "chatbot": reply_to_user})
+    return reply_to_user, source1return, source2return, source3return, conversation_history
+demo = gr.Interface(
+    title = "Nuru Chatbot Demo",
+    description="Enter a question and see the processed outputs in collapsible boxes."
+    fn=nishauri,
+    inputs=["text", gr.State(value=[])],
+    outputs=[
+        gr.Textbox(label = "Nuru Response", type = "text"),
+        gr.Textbox(label = "Source 1", max_lines = 10, autoscroll = False, type = "text"),
+        gr.Textbox(label = "Source 2", max_lines = 10, autoscroll = False, type = "text"),
+        gr.Textbox(label = "Source 3", max_lines = 10, autoscroll = False, type = "text"),
+        gr.State()
+            ],
+)