Update app.py
Browse files
app.py
CHANGED
|
@@ -50,7 +50,8 @@ class EnhancedContextDrivenChatbot:
|
|
| 50 |
self.entity_tracker = {}
|
| 51 |
self.conversation_context = ""
|
| 52 |
self.model = model
|
| 53 |
-
|
|
|
|
| 54 |
def add_to_history(self, text):
|
| 55 |
self.history.append(text)
|
| 56 |
if len(self.history) > self.history_size:
|
|
@@ -79,6 +80,20 @@ class EnhancedContextDrivenChatbot:
|
|
| 79 |
doc = nlp(text)
|
| 80 |
return [chunk.text for chunk in doc.noun_chunks]
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
def get_most_relevant_context(self, question):
|
| 83 |
if not self.history:
|
| 84 |
return question
|
|
@@ -100,16 +115,19 @@ class EnhancedContextDrivenChatbot:
|
|
| 100 |
# Otherwise, it might be a new topic
|
| 101 |
return question
|
| 102 |
|
| 103 |
-
|
|
|
|
| 104 |
if not self.model:
|
| 105 |
return question # Return original question if no model is available
|
| 106 |
|
|
|
|
|
|
|
| 107 |
prompt = f"""
|
| 108 |
-
Given the conversation context
|
| 109 |
|
| 110 |
Conversation context: {self.get_context()}
|
| 111 |
Current question: {question}
|
| 112 |
-
|
| 113 |
Rephrased question:
|
| 114 |
"""
|
| 115 |
|
|
@@ -118,16 +136,21 @@ class EnhancedContextDrivenChatbot:
|
|
| 118 |
return rephrased_question.strip()
|
| 119 |
|
| 120 |
def process_question(self, question):
|
| 121 |
-
|
| 122 |
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
-
|
| 126 |
-
contextualized_question = self.rephrase_query(contextualized_question)
|
| 127 |
|
| 128 |
self.add_to_history(question)
|
| 129 |
|
| 130 |
-
return contextualized_question, topics, self.entity_tracker
|
| 131 |
|
| 132 |
# Initialize LlamaParse
|
| 133 |
llama_parser = LlamaParse(
|
|
@@ -324,7 +347,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 324 |
context_reduction_factor = 0.7
|
| 325 |
|
| 326 |
if web_search:
|
| 327 |
-
contextualized_question, topics, entity_tracker = chatbot.process_question(question)
|
| 328 |
serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
|
| 329 |
search_results = google_search(contextualized_question)
|
| 330 |
all_answers = []
|
|
@@ -342,14 +365,17 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 342 |
|
| 343 |
context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
|
| 344 |
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
| 347 |
Web Search Results:
|
| 348 |
-
{context}
|
| 349 |
-
Conversation Context: {conv_context}
|
| 350 |
-
Current Question: {question}
|
| 351 |
-
Topics: {topics}
|
| 352 |
-
Entity Information: {entities}
|
|
|
|
| 353 |
If the web search results don't contain relevant information, state that the information is not available in the search results.
|
| 354 |
Provide a summarized and direct answer to the question without mentioning the web search or these instructions.
|
| 355 |
Do not include any source information in your answer.
|
|
|
|
| 50 |
self.entity_tracker = {}
|
| 51 |
self.conversation_context = ""
|
| 52 |
self.model = model
|
| 53 |
+
self.last_instructions = None
|
| 54 |
+
|
| 55 |
def add_to_history(self, text):
|
| 56 |
self.history.append(text)
|
| 57 |
if len(self.history) > self.history_size:
|
|
|
|
| 80 |
doc = nlp(text)
|
| 81 |
return [chunk.text for chunk in doc.noun_chunks]
|
| 82 |
|
| 83 |
+
def extract_instructions(self, text):
|
| 84 |
+
instruction_patterns = [
|
| 85 |
+
r"^(?:please\s+)?(?:can\s+you\s+)?(?:could\s+you\s+)?(.*?)\s*(?:for\s+me|for\s+this\s+response|in\s+your\s+response|in\s+your\s+answer)(?:\s*\?)?$",
|
| 86 |
+
r"^(?:I\s+want\s+you\s+to\s+)?(.*?)\s*(?:for\s+me|for\s+this\s+response|in\s+your\s+response|in\s+your\s+answer)(?:\s*\?)?$",
|
| 87 |
+
r"^(?:make\s+sure\s+to\s+)?(.*?)\s*(?:for\s+me|for\s+this\s+response|in\s+your\s+response|in\s+your\s+answer)(?:\s*\?)?$",
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
for pattern in instruction_patterns:
|
| 91 |
+
match = re.match(pattern, text, re.IGNORECASE)
|
| 92 |
+
if match:
|
| 93 |
+
return match.group(1).strip(), True
|
| 94 |
+
|
| 95 |
+
return text, False
|
| 96 |
+
|
| 97 |
def get_most_relevant_context(self, question):
|
| 98 |
if not self.history:
|
| 99 |
return question
|
|
|
|
| 115 |
# Otherwise, it might be a new topic
|
| 116 |
return question
|
| 117 |
|
| 118 |
+
|
| 119 |
+
def rephrase_query(self, question, instructions=None):
|
| 120 |
if not self.model:
|
| 121 |
return question # Return original question if no model is available
|
| 122 |
|
| 123 |
+
instruction_prompt = f"Instructions: {instructions}\n" if instructions else ""
|
| 124 |
+
|
| 125 |
prompt = f"""
|
| 126 |
+
Given the conversation context, the current question, and any provided instructions, rephrase the question to include relevant context:
|
| 127 |
|
| 128 |
Conversation context: {self.get_context()}
|
| 129 |
Current question: {question}
|
| 130 |
+
{instruction_prompt}
|
| 131 |
Rephrased question:
|
| 132 |
"""
|
| 133 |
|
|
|
|
| 136 |
return rephrased_question.strip()
|
| 137 |
|
| 138 |
def process_question(self, question):
|
| 139 |
+
question, has_instructions = self.extract_instructions(question)
|
| 140 |
|
| 141 |
+
if has_instructions:
|
| 142 |
+
self.last_instructions = question
|
| 143 |
+
contextualized_question = self.get_most_relevant_context(question)
|
| 144 |
+
else:
|
| 145 |
+
contextualized_question = self.get_most_relevant_context(question)
|
| 146 |
+
if self.is_follow_up_question(question):
|
| 147 |
+
contextualized_question = self.rephrase_query(contextualized_question, self.last_instructions)
|
| 148 |
|
| 149 |
+
topics = self.extract_topics(contextualized_question)
|
|
|
|
| 150 |
|
| 151 |
self.add_to_history(question)
|
| 152 |
|
| 153 |
+
return contextualized_question, topics, self.entity_tracker, self.last_instructions
|
| 154 |
|
| 155 |
# Initialize LlamaParse
|
| 156 |
llama_parser = LlamaParse(
|
|
|
|
| 347 |
context_reduction_factor = 0.7
|
| 348 |
|
| 349 |
if web_search:
|
| 350 |
+
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
| 351 |
serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
|
| 352 |
search_results = google_search(contextualized_question)
|
| 353 |
all_answers = []
|
|
|
|
| 365 |
|
| 366 |
context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
|
| 367 |
|
| 368 |
+
instruction_prompt = f"Instructions: {instructions}\n" if instructions else ""
|
| 369 |
+
|
| 370 |
+
prompt_template = f"""
|
| 371 |
+
Answer the question based on the following web search results, conversation context, entity information, and any provided instructions:
|
| 372 |
Web Search Results:
|
| 373 |
+
{{context}}
|
| 374 |
+
Conversation Context: {{conv_context}}
|
| 375 |
+
Current Question: {{question}}
|
| 376 |
+
Topics: {{topics}}
|
| 377 |
+
Entity Information: {{entities}}
|
| 378 |
+
{instruction_prompt}
|
| 379 |
If the web search results don't contain relevant information, state that the information is not available in the search results.
|
| 380 |
Provide a summarized and direct answer to the question without mentioning the web search or these instructions.
|
| 381 |
Do not include any source information in your answer.
|