YoniFriedman commited on
Commit
6ada490
·
verified ·
1 Parent(s): 579d3f3

Adding intention detection

Browse files
Files changed (1) hide show
  1. app.py +88 -5
app.py CHANGED
@@ -26,24 +26,107 @@ retriever = index.as_retriever(similarity_top_k = 3)
26
 
27
  import gradio as gr
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def nishauri(question: str, conversation_history: list[str]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
 
31
  context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
32
 
 
33
  # Split the string into words
34
  words = question.split()
35
 
36
  # Count the number of words
37
  num_words = len(words)
38
 
 
39
  lang_question = "en"
40
-
 
41
  if num_words > 4:
42
  lang_question = detect(question)
43
-
 
44
  if lang_question=="sw":
45
  question = GoogleTranslator(source='sw', target='en').translate(question)
46
-
 
47
  sources = retriever.retrieve(question)
48
  source0 = sources[0].text
49
  source1 = sources[1].text
@@ -69,9 +152,9 @@ def nishauri(question: str, conversation_history: list[str]):
69
  f" Please use the following content to generate a response: {source0} {source1} {source2}."
70
  f" Please consider the following background information when generating a response: {background}."
71
  " Keep answers brief and limited to the question that was asked."
72
- " Do not provide information the user did not ask about. If they start with a greeting, just greet them in return and don't share anything else."
73
  " Do not change the subject or address anything the user didn't directly ask about."
74
- " If they respond with an acknowledgement such as 'ok' or 'thanks', simply thank them ask if there is anything else that you can help with.")
75
 
76
  completion = client.chat.completions.create(
77
  model="gpt-4o",
 
26
 
27
  import gradio as gr
28
 
29
+ import re
30
+
31
+ acknowledgment_keywords_sw = ["sawa", "ndiyo", "naam", "hakika", "asante", "nimeelewa", "nimekupata", "ni kweli",
32
+ "kwa hakika", "nimesikia"]
33
+ acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"]
34
+ follow_up_keywords = ["but", "also", "and", "what", "how", "why", "when",
35
+ "lakini", "pia", "na", "nini", "vipi", "kwanini", "wakati"]
36
+ greeting_keywords_sw = ["sasa", "niaje", "habari", "mambo", "jambo", "shikamoo", "marahaba", "hujambo", "hamjambo", "salama", "vipi"]
37
+ greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"]
38
+
39
+ def contains_exact_word_or_phrase(text, keywords):
40
+ text = text.lower()
41
+ for keyword in keywords:
42
+ if re.search(r'\b' + re.escape(keyword) + r'\b', text):
43
+ return True
44
+ return False
45
+
46
+ def contains_greeting_sw(question):
47
+ # Check if the question contains acknowledgment keywords
48
+ # words = question.lower().split()
49
+ # return any(keyword in words for keyword in greeting_keywords_sw)
50
+ return contains_exact_word_or_phrase(question, greeting_keywords_sw)
51
+
52
+ def contains_greeting_en(question):
53
+ # Check if the question contains acknowledgment keywords
54
+ # words = question.lower().split()
55
+ # return any(keyword in words for keyword in greeting_keywords_en)
56
+ return contains_exact_word_or_phrase(question, greeting_keywords_en)
57
+
58
+ def contains_acknowledgment_sw(question):
59
+ # Check if the question contains acknowledgment keywords
60
+ # words = question.lower().split()
61
+ # return any(keyword in words for keyword in acknowledgment_keywords_sw)
62
+ return contains_exact_word_or_phrase(question, acknowledgment_keywords_sw)
63
+
64
+ def contains_acknowledgment_en(question):
65
+ # Check if the question contains acknowledgment keywords
66
+ # words = question.lower().split()
67
+ # return any(keyword in words for keyword in acknowledgment_keywords_en)
68
+ return contains_exact_word_or_phrase(question, acknowledgment_keywords_en)
69
+
70
+ def contains_follow_up(question):
71
+ # Check if the question contains follow-up indicators
72
+ return contains_exact_word_or_phrase(question, follow_up_keywords)
73
+
74
+ def process_acknowledgment_response(question):
75
+ # Handle simple acknowledgment
76
+ if contains_acknowledgment_en(question) and not contains_follow_up(question):
77
+ return "You're welcome! Is there anything else I can help with?"
78
+ elif contains_acknowledgment_sw(question) and not contains_follow_up(question):
79
+ return "Karibu! Kuna kitu kingine chochote ninachoweza kusaidia?"
80
+ return None
81
+
82
+ def process_greeting_response(question):
83
+ # Handle simple acknowledgment
84
+ if contains_greeting_en(question):
85
+ return "Hi! Can I assist with any question related to HIV?"
86
+ elif contains_greeting_sw(question):
87
+ return "Habari! Je, ninaweza kusaidia kwa swali lolote linalohusiana na virusi vya ukimwe?"
88
+ return None
89
+
90
  def nishauri(question: str, conversation_history: list[str]):
91
+
92
+ ## If a greeting, then respond accordingly and do not proceed with RAG
93
+ # Process greeting
94
+ greet_response = process_greeting_response(question)
95
+ if greet_response:
96
+ conversation_history.append({"user": question, "chatbot": greet_response})
97
+ return greet_response, conversation_history
98
+
99
+ ## If user is acknowledging chatbot's response and not asking a follow up, then respond accordingly
100
+ # Process acknowledgment
101
+ ack_response = process_acknowledgment_response(question)
102
+ if ack_response:
103
+ conversation_history.append({"user": question, "chatbot": ack_response})
104
+ return ack_response, conversation_history
105
+
106
+ ## Otherwise, proceed with RAG
107
 
108
+ # Create user history
109
  context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
110
 
111
+ ## Language detection - we want to run the pipeline in English since our sources are in English
112
  # Split the string into words
113
  words = question.split()
114
 
115
  # Count the number of words
116
  num_words = len(words)
117
 
118
+ # By default, let's assume the language is English
119
  lang_question = "en"
120
+
121
+ # Language detection is unreliable with fewer that five words, so only check if at least five words
122
  if num_words > 4:
123
  lang_question = detect(question)
124
+
125
+ # If language is swahili, then translate question to english
126
  if lang_question=="sw":
127
  question = GoogleTranslator(source='sw', target='en').translate(question)
128
+
129
+ # Now, retrieve relevant sources
130
  sources = retriever.retrieve(question)
131
  source0 = sources[0].text
132
  source1 = sources[1].text
 
152
  f" Please use the following content to generate a response: {source0} {source1} {source2}."
153
  f" Please consider the following background information when generating a response: {background}."
154
  " Keep answers brief and limited to the question that was asked."
155
+ " If they start with a greeting, just greet them in return and don't share anything else."
156
  " Do not change the subject or address anything the user didn't directly ask about."
157
+ " If they respond with an acknowledgement, simply thank them ask if there is anything else that you can help with."
158
 
159
  completion = client.chat.completions.create(
160
  model="gpt-4o",