Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["OPENAI_API_KEY"] | |
| from llama_index.llms.openai import OpenAI | |
| from llama_index.core.schema import MetadataMode | |
| import openai | |
| from openai import OpenAI as OpenAIOG | |
| import logging | |
| import sys | |
| llm = OpenAI(temperature=0.0, model="gpt-3.5-turbo") | |
| client = OpenAIOG() | |
| from langdetect import detect | |
| from langdetect import DetectorFactory | |
| DetectorFactory.seed = 0 | |
| from deep_translator import GoogleTranslator | |
| from lingua import Language, LanguageDetectorBuilder | |
| # Load index | |
| from llama_index.core import VectorStoreIndex | |
| from llama_index.core import StorageContext | |
| from llama_index.core import load_index_from_storage | |
| storage_context = StorageContext.from_defaults(persist_dir="arv_metadata") | |
| index = load_index_from_storage(storage_context) | |
| query_engine = index.as_query_engine(similarity_top_k=3, llm=llm) | |
| retriever = index.as_retriever(similarity_top_k = 3) | |
| import gradio as gr | |
| import re | |
| import json | |
| from datetime import datetime | |
| acknowledgment_keywords_sw = ["sawa", "ndiyo", "naam", "hakika", "asante", "nimeelewa", "nimekupata", "ni kweli", | |
| "kwa hakika", "nimesikia"] | |
| acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"] | |
| follow_up_keywords = ["but", "also", "and", "what", "how", "why", "when", "is", "?", | |
| "lakini", "pia", "na", "nini", "vipi", "kwanini", "wakati"] | |
| greeting_keywords_sw = ["sasa", "niaje", "habari", "mambo", "jambo", "shikamoo", "marahaba", "hujambo", "hamjambo", "salama", "vipi"] | |
| greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"] | |
| def contains_exact_word_or_phrase(text, keywords): | |
| text = text.lower() | |
| for keyword in keywords: | |
| if re.search(r'\b' + re.escape(keyword) + r'\b', text): | |
| return True | |
| return False | |
| def contains_greeting_sw(question): | |
| # Check if the question contains acknowledgment keywords | |
| return contains_exact_word_or_phrase(question, greeting_keywords_sw) | |
| def contains_greeting_en(question): | |
| # Check if the question contains acknowledgment keywords | |
| return contains_exact_word_or_phrase(question, greeting_keywords_en) | |
| def contains_acknowledgment_sw(question): | |
| # Check if the question contains acknowledgment keywords | |
| return contains_exact_word_or_phrase(question, acknowledgment_keywords_sw) | |
| def contains_acknowledgment_en(question): | |
| # Check if the question contains acknowledgment keywords | |
| return contains_exact_word_or_phrase(question, acknowledgment_keywords_en) | |
| def contains_follow_up(question): | |
| # Check if the question contains follow-up indicators | |
| return contains_exact_word_or_phrase(question, follow_up_keywords) | |
| def convert_to_date(date_str): | |
| return datetime.strptime(date_str, "%Y%m%d") | |
| def detect_language(question): | |
| # Check if the text has less than 5 words | |
| if len(question.split()) < 5: | |
| languages = [Language.ENGLISH, Language.SWAHILI] # Add more languages as needed | |
| detector = LanguageDetectorBuilder.from_languages(*languages).build() | |
| detected_language = detector.detect_language_of(question) | |
| # Return language code for consistency | |
| if detected_language == Language.SWAHILI: | |
| return "sw" | |
| elif detected_language == Language.ENGLISH: | |
| return "en" | |
| else: | |
| try: | |
| lang_detect = detect(question) | |
| return lang_detect | |
| except Exception as e: | |
| print(f"Error with langdetect: {e}") | |
| return "unknown" | |
| def nishauri(question: str, conversation_history: list[str]): | |
| # Get conversation history | |
| context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history]) | |
| ## Process greeting | |
| # greet_response = process_greeting_response(question) | |
| if contains_greeting_en(question) and not contains_follow_up(question): | |
| greeting = ( | |
| f" The user previously asked and answered the following: {context}. " | |
| f" The user just provided the following greeting: {question}. " | |
| "Please respond accordingly in English." | |
| ) | |
| completion = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "user", "content": greeting} | |
| ] | |
| ) | |
| reply_to_user = completion.choices[0].message.content | |
| conversation_history.append({"user": question, "chatbot": reply_to_user}) | |
| return reply_to_user, conversation_history | |
| if contains_greeting_sw(question) and not contains_follow_up(question): | |
| greeting = ( | |
| f" The user previously asked and answered the following: {context}. " | |
| f" The user just provided the following greeting: {question}. " | |
| "Please respond accordingly in Swahili." | |
| ) | |
| completion = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "user", "content": greeting} | |
| ] | |
| ) | |
| reply_to_user = completion.choices[0].message.content | |
| conversation_history.append({"user": question, "chatbot": reply_to_user}) | |
| return reply_to_user, conversation_history | |
| ## Process acknowledgment | |
| if contains_acknowledgment_en(question) and not contains_follow_up(question): | |
| acknowledgment = ( | |
| f" The user previously asked and answered the following: {context}. " | |
| f" The user just provided the following acknowledgement: {question}. " | |
| "Please respond accordingly in English." | |
| ) | |
| completion = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "user", "content": acknowledgment} | |
| ] | |
| ) | |
| reply_to_user = completion.choices[0].message.content | |
| conversation_history.append({"user": question, "chatbot": reply_to_user}) | |
| return reply_to_user, conversation_history | |
| if contains_acknowledgment_sw(question) and not contains_follow_up(question): | |
| acknowledgment = ( | |
| f" The user previously asked and answered the following: {context}. " | |
| f" The user just provided the following acknowledgment: {question}. " | |
| "Please respond accordingly in Swahili." | |
| ) | |
| completion = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "user", "content": acknowledgment} | |
| ] | |
| ) | |
| reply_to_user = completion.choices[0].message.content | |
| conversation_history.append({"user": question, "chatbot": reply_to_user}) | |
| return reply_to_user, conversation_history | |
| ## If not greeting or acknowledgement, then proceed with RAG | |
| ## Detect language of question - if Swahili, translate to English | |
| lang_question = detect_language(question) | |
| if lang_question=="sw": | |
| question = GoogleTranslator(source='sw', target='en').translate(question) | |
| # Now, retrieve relevant sources | |
| sources = retriever.retrieve(question) | |
| source0 = sources[0].text | |
| source1 = sources[1].text | |
| source2 = sources[2].text | |
| source1return = ("File Name: " + | |
| sources[0].metadata["file_name"] + | |
| "\nPage Number: " + | |
| sources[0].metadata["page_label"] + | |
| "\n Source Text: " + | |
| sources[0].text) | |
| source2return = ("File Name: " + | |
| sources[1].metadata["file_name"] + | |
| "\nPage Number: " + | |
| sources[1].metadata["page_label"] + | |
| "\n Source Text: " + | |
| sources[1].text) | |
| source3return = ("File Name: " + | |
| sources[2].metadata["file_name"] + | |
| "\nPage Number: " + | |
| sources[2].metadata["page_label"] + | |
| "\n Source Text: " + | |
| sources[2].text) | |
| background = ("The person who asked the question is a person living with HIV." | |
| " They are asking questions about HIV. Do not talk about anything that is not related to HIV. " | |
| " Recognize that they already have HIV and do not suggest that they have to get tested" | |
| " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should." | |
| " Do not suggest anything that is not relevant to someone who already has HIV." | |
| " Do not mention in the response that the person is living with HIV." | |
| " The following information about viral loads is authoritative for any question about viral loads:" | |
| # " Under 50 copies/ml is low detectable level," | |
| # " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and " | |
| # " 1000 and above is suspected treatment failure." | |
| " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml." | |
| " A viral load above 1000 copies/ml suggests treatment failure." | |
| " A suppressed viral load is one below 200 copies / ml.") | |
| question_final = ( | |
| f" The user previously asked and answered the following: {context}. " | |
| f" The user just asked the following question: {question}." | |
| f" Please use the following content to generate a response: {source0} {source1} {source2}." | |
| f" Please consider the following background information when generating a response: {background}." | |
| " Keep answers brief and limited to the question that was asked." | |
| " If they share a greeting, just greet them in return and ask if they have a question." | |
| " Do not change the subject or address anything the user didn't directly ask about." | |
| " If they respond with an acknowledgement, simply thank them." | |
| " Do not discuss anything other than HIV. If they ask a question that is not about HIV, respond that" | |
| " you are only able to discuss HIV." | |
| " Keep the response to under 50 words and use simple language. The person asking the question does not know technical terms." | |
| ) | |
| completion = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "user", "content": question_final} | |
| ] | |
| ) | |
| reply_to_user = completion.choices[0].message.content | |
| if lang_question=="sw": | |
| reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user) | |
| conversation_history.append({"user": question, "chatbot": reply_to_user}) | |
| return reply_to_user, source1return, source2return, source3return, conversation_history | |
| demo = gr.Interface( | |
| title = "Nuru Chatbot Demo", | |
| description="Enter a question and see the processed outputs in collapsible boxes.", | |
| fn=nishauri, | |
| inputs=["text", gr.State(value=[])], | |
| outputs=[ | |
| gr.Textbox(label = "Nuru Response", type = "text"), | |
| gr.Textbox(label = "Source 1", max_lines = 10, autoscroll = False, type = "text"), | |
| gr.Textbox(label = "Source 2", max_lines = 10, autoscroll = False, type = "text"), | |
| gr.Textbox(label = "Source 3", max_lines = 10, autoscroll = False, type = "text"), | |
| gr.State() | |
| ], | |
| ) |