YoniFriedman commited on
Commit
bae1e89
·
verified ·
1 Parent(s): fd3f471

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -0
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ import os
3
+ import logging
4
+ import sys
5
+ import re
6
+ import json
7
+ from deep_translator import GoogleTranslator
8
+ from lingua import Language, LanguageDetectorBuilder
9
+ import gradio as gr
10
+ from openai import OpenAI as OpenAIOG
11
+ from llama_index.llms.openai import OpenAI
12
+ from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
13
+
14
+ # Set OpenAI API Key (Ensure this is set in the environment)
15
+ os.environ.get("OPENAI_API_KEY")
16
+
17
+ # Initialize OpenAI clients
18
+ client = OpenAIOG()
19
+
20
+ # Load index for retrieval
21
+ storage_context = StorageContext.from_defaults(persist_dir="lamis_lp_metadata")
22
+ index = load_index_from_storage(storage_context)
23
+ retriever = index.as_retriever(similarity_top_k=5)
24
+
25
+ # Define keyword lists
26
+ acknowledgment_keywords_yo = ["Ẹ ṣé", "Ẹ ṣé gan", "Ẹ ṣéun", "Ọ ṣeun", "Ọ dára", "Ọ tọ́", "Mo ti gbọ́",
27
+ "Ẹ ṣeun fún ifọ̀rọ̀wánilẹ́nuwò", "Ó yé mi", "Kò burú"]
28
+ acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"]
29
+ follow_up_keywords = ["Ṣùgbọ́n", "Pẹ̀lú", "Tun", "Ati", "Kí ni", "Báwo", "Kí ló dé", "Èéṣé", "Nigbà wo", "Ni", "?",
30
+ "but", "also", "and", "what", "how", "why", "when", "is"]
31
+ greeting_keywords_yo = ["Báwo ni", "Ẹ káàárọ̀", "Ẹ káàsán", "Ẹ kúùrọ̀lẹ́", "Ẹ káàbọ̀", "Ẹ kúulé", "Ẹ kuùjọ̀kòó"]
32
+ greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"]
33
+
34
+ # Define helper functions
35
+
36
+ def contains_exact_word_or_phrase(text, keywords):
37
+ """Check if the given text contains any exact keyword from the list."""
38
+ text = text.lower()
39
+ return any(re.search(r'\b' + re.escape(keyword) + r'\b', text) for keyword in keywords)
40
+
41
+ def contains_greeting_yo(text):
42
+ return contains_exact_word_or_phrase(text, greeting_keywords_yo)
43
+
44
+ def contains_greeting_en(text):
45
+ return contains_exact_word_or_phrase(text, greeting_keywords_en)
46
+
47
+ def contains_acknowledgment_yo(text):
48
+ return contains_exact_word_or_phrase(text, acknowledgment_keywords_yo)
49
+
50
+ def contains_acknowledgment_en(text):
51
+ return contains_exact_word_or_phrase(text, acknowledgment_keywords_en)
52
+
53
+ def contains_follow_up(text):
54
+ return contains_exact_word_or_phrase(text, follow_up_keywords)
55
+
56
+ def detect_language(text):
57
+ """Detect language of a given text using Lingua, restricted to Yoruba and English."""
58
+ languages = [Language.ENGLISH, Language.YORUBA]
59
+ detector = LanguageDetectorBuilder.from_languages(*languages).build()
60
+ detected_language = detector.detect_language_of(text)
61
+ print(detected_language)
62
+ if detected_language is None:
63
+ return "unknown"
64
+ return "yo" if detected_language == Language.YORUBA else "en"
65
+
66
+
67
+ # Define Gradio function
68
+ def idahun(question, conversation_history: list[str]):
69
+
70
+ """Process user query, detect language, handle greetings, acknowledgments, and retrieve relevant information."""
71
+ context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
72
+
73
+ # Process greetings and acknowledgments
74
+ for lang, contains_greeting, contains_acknowledgment in [("en", contains_greeting_en, contains_acknowledgment_en), ("yo", contains_greeting_yo, contains_acknowledgment_yo)]:
75
+ if contains_greeting(question) and not contains_follow_up(question):
76
+ prompt = f"The user said: {question}. Respond accordingly in {lang}."
77
+ elif contains_acknowledgment(question) and not contains_follow_up(question):
78
+ prompt = f"The user acknowledged: {question}. Respond accordingly in {lang}."
79
+ else:
80
+ continue
81
+ completion = client.chat.completions.create(
82
+ model="gpt-4o",
83
+ messages=[{"role": "user", "content": prompt}]
84
+ )
85
+ reply_to_user = completion.choices[0].message.content
86
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
87
+ Source1 = ""
88
+ Source2 = ""
89
+ Source3 = ""
90
+ return reply_to_user, Source1, Source2, Source3, conversation_history
91
+
92
+ # Detect language and translate if needed
93
+ lang_question = detect_language(question)
94
+ if lang_question == "yo":
95
+ question = GoogleTranslator(source='yo', target='en').translate(question)
96
+
97
+ # Retrieve relevant sources
98
+ sources = retriever.retrieve(question)
99
+ retrieved_text = "\n\n".join([f"Source {i+1}: {source.text}" for i, source in enumerate(sources[:3])])
100
+
101
+ Source1 = ("File Name: " +
102
+ sources[0].metadata["source"] +
103
+ "\nPage Number: " +
104
+ sources[0].metadata["page_label"] +
105
+ "\n Source Test: " +
106
+ sources[0].text)
107
+
108
+ Source2 = ("File Name: " +
109
+ sources[1].metadata["source"] +
110
+ "\nPage Number: " +
111
+ sources[1].metadata["page_label"] +
112
+ "\n Source Test: " +
113
+ sources[1].text)
114
+
115
+ Source3 = ("File Name: " +
116
+ sources[2].metadata["source"] +
117
+ "\nPage Number: " +
118
+ sources[2].metadata["page_label"] +
119
+ "\n Source Test: " +
120
+ sources[2].text)
121
+
122
+ # Combine into new user question - conversation history, new question, retrieved sources
123
+ question_final = (
124
+ f"The user asked the following question: \"{question}\"\n\n"
125
+ f"Use only the content below to answer the question:\n\n{retrieved_text}\n\n"
126
+ "Guidelines:\n"
127
+ "- Only answer the question that was asked.\n"
128
+ "- Do not change the subject or include unrelated information.\n"
129
+ "- Only discuss topics related to HIV and associated infections. If the question is not relevant, say that you can only answer relevant questions.\n"
130
+ )
131
+
132
+ # Set LLM instructions. If user consented, add user parameters, otherwise proceed without
133
+ system_prompt = (
134
+ "You are a helpful assistant who only answers questions about Nigeria's HIV guidelines and about using the LAMIS Plus EMR.\n"
135
+ "- Do not answer questions about other topics.\n"
136
+ "- If a question is unrelated to HIV or LAMIS Plus, politely respond that you can only answer HIV- or LAMIS Plus-related questions.\n\n"
137
+ )
138
+
139
+ # Start with context
140
+ messages = [{"role": "system", "content": system_prompt}]
141
+
142
+ # Add conversation history
143
+ for turn in conversation_history:
144
+ messages.append({"role": "user", "content": turn["user"]})
145
+ messages.append({"role": "assistant", "content": turn["chatbot"]})
146
+
147
+ # Finally, add the current question
148
+ messages.append({"role": "user", "content": question_final})
149
+
150
+ # Generate response
151
+ completion = client.chat.completions.create(
152
+ model="gpt-4o",
153
+ messages=messages
154
+ )
155
+
156
+ # Collect response
157
+ reply_to_user = completion.choices[0].message.content
158
+
159
+ # add question and reply to conversation history
160
+ conversation_history.append({"user": question, "chatbot": reply_to_user})
161
+
162
+ # If initial question was in yoruba, translate response to yoruba
163
+ if lang_question=="yo":
164
+ reply_to_user = GoogleTranslator(source='auto', target='yo').translate(reply_to_user)
165
+
166
+ # return system_prompt, conversation_history
167
+ # return reply_to_user, conversation_history
168
+ return reply_to_user, Source1, Source2, Source3, conversation_history
169
+
170
+ demo = gr.Interface(
171
+ title = "Idahun Chatbot Demo",
172
+ fn=idahun,
173
+ inputs=["text", gr.State(value=[])],
174
+ outputs=[
175
+ gr.Textbox(label = "Idahun Response", type = "text"),
176
+ gr.Textbox(label = "Source 1", max_lines = 10, autoscroll = False, type = "text"),
177
+ gr.Textbox(label = "Source 2", max_lines = 10, autoscroll = False, type = "text"),
178
+ gr.Textbox(label = "Source 3", max_lines = 10, autoscroll = False, type = "text"),
179
+ gr.State()
180
+ ],
181
+ )
182
+
183
+ demo.launch()
184
+
185
+