Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,10 @@ import PyPDF2
|
|
| 9 |
log_file_path = "/tmp/support_bot_log.txt"
|
| 10 |
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
class SupportBotAgent:
|
| 13 |
def __init__(self, document_path):
|
| 14 |
# Load a pre-trained question-answering model
|
|
@@ -18,8 +22,14 @@ class SupportBotAgent:
|
|
| 18 |
# Load the document text and split it into sections (by paragraphs)
|
| 19 |
self.document_text = self.load_document(document_path)
|
| 20 |
self.sections = self.document_text.split('\n\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
|
| 22 |
logging.info(f"Loaded document: {document_path}")
|
|
|
|
| 23 |
|
| 24 |
def load_document(self, path):
|
| 25 |
"""Loads and extracts text from a TXT or PDF file."""
|
|
@@ -39,8 +49,10 @@ class SupportBotAgent:
|
|
| 39 |
else:
|
| 40 |
file_type = "Unsupported Format"
|
| 41 |
logging.error(f"Unsupported file format: {path}")
|
|
|
|
| 42 |
raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
|
| 43 |
logging.info(f"Loaded {file_type}: {path}")
|
|
|
|
| 44 |
return text
|
| 45 |
|
| 46 |
def find_relevant_section(self, query):
|
|
@@ -53,22 +65,26 @@ class SupportBotAgent:
|
|
| 53 |
best_idx = similarities.argmax().item()
|
| 54 |
best_section = self.sections[best_idx]
|
| 55 |
similarity_score = similarities[best_idx].item()
|
| 56 |
-
SIMILARITY_THRESHOLD = 0.4
|
| 57 |
|
| 58 |
if similarity_score >= SIMILARITY_THRESHOLD:
|
| 59 |
-
logging.info(f"Found relevant section using embeddings for query: {query}")
|
|
|
|
| 60 |
return best_section
|
| 61 |
|
| 62 |
-
logging.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
|
|
|
|
| 63 |
query_words = {word for word in query.lower().split() if word not in stopwords}
|
| 64 |
for section in self.sections:
|
| 65 |
section_words = {word for word in section.lower().split() if word not in stopwords}
|
| 66 |
common_words = query_words.intersection(section_words)
|
| 67 |
if len(common_words) >= 2:
|
| 68 |
logging.info(f"Keyword match for query: {query} with common words: {common_words}")
|
|
|
|
| 69 |
return section
|
| 70 |
|
| 71 |
logging.info("No good keyword match found. Returning default response.")
|
|
|
|
| 72 |
return "I don’t have enough information to answer that."
|
| 73 |
|
| 74 |
def answer_query(self, query):
|
|
@@ -79,6 +95,7 @@ class SupportBotAgent:
|
|
| 79 |
result = self.qa_model(question=query, context=context, max_answer_len=50)
|
| 80 |
answer = result["answer"]
|
| 81 |
logging.info(f"Answer for query '{query}': {answer}")
|
|
|
|
| 82 |
return answer
|
| 83 |
|
| 84 |
def adjust_response(self, query, response, feedback):
|
|
@@ -91,6 +108,7 @@ class SupportBotAgent:
|
|
| 91 |
else:
|
| 92 |
adjusted_response = response
|
| 93 |
logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
|
|
|
|
| 94 |
return adjusted_response
|
| 95 |
|
| 96 |
# --- Gradio Functions and App Workflow ---
|
|
@@ -98,8 +116,10 @@ class SupportBotAgent:
|
|
| 98 |
def process_file(file, state):
|
| 99 |
"""Handles file upload and initializes the SupportBotAgent."""
|
| 100 |
logging.info("Received file upload request")
|
|
|
|
| 101 |
if file is None:
|
| 102 |
logging.info("No file uploaded")
|
|
|
|
| 103 |
return [("Bot", "Please upload a TXT or PDF file.")], state
|
| 104 |
|
| 105 |
# Save the uploaded file to /tmp. Handle both file objects and NamedString.
|
|
@@ -112,10 +132,14 @@ def process_file(file, state):
|
|
| 112 |
if isinstance(content, str):
|
| 113 |
content = content.encode("utf-8")
|
| 114 |
f.write(content)
|
|
|
|
|
|
|
| 115 |
|
| 116 |
try:
|
| 117 |
state["agent"] = SupportBotAgent(temp_path)
|
| 118 |
except Exception as e:
|
|
|
|
|
|
|
| 119 |
return [("Bot", f"Error processing file: {str(e)}")], state
|
| 120 |
|
| 121 |
state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
|
|
@@ -136,6 +160,7 @@ def process_input(user_input, state):
|
|
| 136 |
if user_input.lower() == "exit":
|
| 137 |
state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
|
| 138 |
state["mode"] = "ended"
|
|
|
|
| 139 |
return state["chat_history"], state
|
| 140 |
|
| 141 |
if state["mode"] == "query":
|
|
@@ -157,6 +182,7 @@ def process_input(user_input, state):
|
|
| 157 |
state["last_answer"] = new_answer
|
| 158 |
state["feedback_count"] += 1
|
| 159 |
state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
|
|
|
|
| 160 |
return state["chat_history"], state
|
| 161 |
|
| 162 |
# --- Gradio UI Setup ---
|
|
|
|
| 9 |
log_file_path = "/tmp/support_bot_log.txt"
|
| 10 |
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
|
| 11 |
|
| 12 |
+
def flush_logs():
|
| 13 |
+
for handler in logging.getLogger().handlers:
|
| 14 |
+
handler.flush()
|
| 15 |
+
|
| 16 |
class SupportBotAgent:
|
| 17 |
def __init__(self, document_path):
|
| 18 |
# Load a pre-trained question-answering model
|
|
|
|
| 22 |
# Load the document text and split it into sections (by paragraphs)
|
| 23 |
self.document_text = self.load_document(document_path)
|
| 24 |
self.sections = self.document_text.split('\n\n')
|
| 25 |
+
flush_logs()
|
| 26 |
+
# Log document length for debugging
|
| 27 |
+
logging.info(f"Document length: {len(self.document_text)} characters")
|
| 28 |
+
flush_logs()
|
| 29 |
+
# Create embeddings for all sections
|
| 30 |
self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
|
| 31 |
logging.info(f"Loaded document: {document_path}")
|
| 32 |
+
flush_logs()
|
| 33 |
|
| 34 |
def load_document(self, path):
|
| 35 |
"""Loads and extracts text from a TXT or PDF file."""
|
|
|
|
| 49 |
else:
|
| 50 |
file_type = "Unsupported Format"
|
| 51 |
logging.error(f"Unsupported file format: {path}")
|
| 52 |
+
flush_logs()
|
| 53 |
raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
|
| 54 |
logging.info(f"Loaded {file_type}: {path}")
|
| 55 |
+
flush_logs()
|
| 56 |
return text
|
| 57 |
|
| 58 |
def find_relevant_section(self, query):
|
|
|
|
| 65 |
best_idx = similarities.argmax().item()
|
| 66 |
best_section = self.sections[best_idx]
|
| 67 |
similarity_score = similarities[best_idx].item()
|
| 68 |
+
SIMILARITY_THRESHOLD = 0.4 # Adjust if needed
|
| 69 |
|
| 70 |
if similarity_score >= SIMILARITY_THRESHOLD:
|
| 71 |
+
logging.info(f"Found relevant section using embeddings for query: {query} (score: {similarity_score})")
|
| 72 |
+
flush_logs()
|
| 73 |
return best_section
|
| 74 |
|
| 75 |
+
logging.info(f"Low similarity ({similarity_score}) for query: {query}. Falling back to keyword search.")
|
| 76 |
+
flush_logs()
|
| 77 |
query_words = {word for word in query.lower().split() if word not in stopwords}
|
| 78 |
for section in self.sections:
|
| 79 |
section_words = {word for word in section.lower().split() if word not in stopwords}
|
| 80 |
common_words = query_words.intersection(section_words)
|
| 81 |
if len(common_words) >= 2:
|
| 82 |
logging.info(f"Keyword match for query: {query} with common words: {common_words}")
|
| 83 |
+
flush_logs()
|
| 84 |
return section
|
| 85 |
|
| 86 |
logging.info("No good keyword match found. Returning default response.")
|
| 87 |
+
flush_logs()
|
| 88 |
return "I don’t have enough information to answer that."
|
| 89 |
|
| 90 |
def answer_query(self, query):
|
|
|
|
| 95 |
result = self.qa_model(question=query, context=context, max_answer_len=50)
|
| 96 |
answer = result["answer"]
|
| 97 |
logging.info(f"Answer for query '{query}': {answer}")
|
| 98 |
+
flush_logs()
|
| 99 |
return answer
|
| 100 |
|
| 101 |
def adjust_response(self, query, response, feedback):
|
|
|
|
| 108 |
else:
|
| 109 |
adjusted_response = response
|
| 110 |
logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
|
| 111 |
+
flush_logs()
|
| 112 |
return adjusted_response
|
| 113 |
|
| 114 |
# --- Gradio Functions and App Workflow ---
|
|
|
|
| 116 |
def process_file(file, state):
|
| 117 |
"""Handles file upload and initializes the SupportBotAgent."""
|
| 118 |
logging.info("Received file upload request")
|
| 119 |
+
flush_logs()
|
| 120 |
if file is None:
|
| 121 |
logging.info("No file uploaded")
|
| 122 |
+
flush_logs()
|
| 123 |
return [("Bot", "Please upload a TXT or PDF file.")], state
|
| 124 |
|
| 125 |
# Save the uploaded file to /tmp. Handle both file objects and NamedString.
|
|
|
|
| 132 |
if isinstance(content, str):
|
| 133 |
content = content.encode("utf-8")
|
| 134 |
f.write(content)
|
| 135 |
+
logging.info(f"Saved uploaded file to {temp_path} (size: {os.path.getsize(temp_path)} bytes)")
|
| 136 |
+
flush_logs()
|
| 137 |
|
| 138 |
try:
|
| 139 |
state["agent"] = SupportBotAgent(temp_path)
|
| 140 |
except Exception as e:
|
| 141 |
+
logging.error(f"Error processing file: {str(e)}")
|
| 142 |
+
flush_logs()
|
| 143 |
return [("Bot", f"Error processing file: {str(e)}")], state
|
| 144 |
|
| 145 |
state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
|
|
|
|
| 160 |
if user_input.lower() == "exit":
|
| 161 |
state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
|
| 162 |
state["mode"] = "ended"
|
| 163 |
+
flush_logs()
|
| 164 |
return state["chat_history"], state
|
| 165 |
|
| 166 |
if state["mode"] == "query":
|
|
|
|
| 182 |
state["last_answer"] = new_answer
|
| 183 |
state["feedback_count"] += 1
|
| 184 |
state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
|
| 185 |
+
flush_logs()
|
| 186 |
return state["chat_history"], state
|
| 187 |
|
| 188 |
# --- Gradio UI Setup ---
|