Spaces:

Shriharsh
/

Customer_Support_Bot_with_Document_Training

Sleeping

App Files Files Community

Shriharsh commited on Mar 13, 2025

Commit

e764d84

verified ·

1 Parent(s): 8d300dd

Create app.py

Browse files

Files changed (1) hide show

app.py +150 -0

app.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import logging
+import gradio as gr
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer, util
+import PyPDF2
+# Set up logging
+logging.basicConfig(filename='support_bot_log.txt', level=logging.INFO)
+# Load models
+qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# Helper function to extract text from PDF
+def extract_text_from_pdf(file_path):
+    text = ""
+    with open(file_path, "rb") as file:
+        pdf_reader = PyPDF2.PdfReader(file)
+        for page in pdf_reader.pages:
+            text += page.extract_text() + "\n"
+    return text
+# Find the most relevant section in the document
+def find_relevant_section(query, sections, section_embeddings):
+    stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
+    # Semantic search
+    query_embedding = embedder.encode(query, convert_to_tensor=True)
+    similarities = util.cos_sim(query_embedding, section_embeddings)[0]
+    best_idx = similarities.argmax().item()
+    best_section = sections[best_idx]
+    similarity_score = similarities[best_idx].item()
+    SIMILARITY_THRESHOLD = 0.4
+    if similarity_score >= SIMILARITY_THRESHOLD:
+        logging.info(f"Found relevant section using embeddings for query: {query}")
+        return best_section
+    logging.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
+    # Keyword-based fallback search with stopword filtering
+    query_words = {word for word in query.lower().split() if word not in stopwords}
+    for section in sections:
+        section_words = {word for word in section.lower().split() if word not in stopwords}
+        common_words = query_words.intersection(section_words)
+        if len(common_words) >= 2:
+            logging.info(f"Keyword match found for query: {query} with common words: {common_words}")
+            return section
+    logging.info(f"No good keyword match found. Returning default fallback response.")
+    return "I don’t have enough information to answer that."
+# Process the uploaded file
+def process_file(file, state):
+    if file is None:
+        return [("Bot", "Please upload a file.")], state
+    file_path = file.name
+    if file_path.lower().endswith(".pdf"):
+        text = extract_text_from_pdf(file_path)
+    elif file_path.lower().endswith(".txt"):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            text = f.read()
+    else:
+        return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
+    sections = text.split('\n\n')
+    section_embeddings = embedder.encode(sections, convert_to_tensor=True)
+    state['document_text'] = text
+    state['sections'] = sections
+    state['section_embeddings'] = section_embeddings
+    state['current_query'] = None
+    state['feedback_count'] = 0
+    state['mode'] = 'waiting_for_query'
+    state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
+    logging.info(f"Processed file: {file_path}")
+    return state['chat_history'], state
+# Handle user input (queries and feedback)
+def handle_input(user_input, state):
+    if state['mode'] == 'waiting_for_upload':
+        state['chat_history'].append(("Bot", "Please upload a file first."))
+    elif state['mode'] == 'waiting_for_query':
+        query = user_input
+        state['current_query'] = query
+        state['feedback_count'] = 0
+        context = find_relevant_section(query, state['sections'], state['section_embeddings'])
+        if context == "I don’t have enough information to answer that.":
+            answer = context
+        else:
+            result = qa_model(question=query, context=context)
+            answer = result["answer"]
+        state['last_answer'] = answer
+        state['mode'] = 'waiting_for_feedback'
+        state['chat_history'].append(("User", query))
+        state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
+        logging.info(f"Query: {query}, Answer: {answer}")
+    elif state['mode'] == 'waiting_for_feedback':
+        feedback = user_input.lower()
+        state['chat_history'].append(("User", feedback))
+        logging.info(f"Feedback: {feedback}")
+        if feedback == "good" or state['feedback_count'] >= 2:
+            state['mode'] = 'waiting_for_query'
+            if feedback == "good":
+                state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
+            else:
+                state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
+        else:
+            query = state['current_query']
+            context = find_relevant_section(query, state['sections'], state['section_embeddings'])
+            if feedback == "too vague":
+                adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
+            elif feedback == "not helpful":
+                adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
+            else:
+                state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
+                return state['chat_history'], state
+            state['last_answer'] = adjusted_answer
+            state['feedback_count'] += 1
+            state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
+            logging.info(f"Adjusted answer: {adjusted_answer}")
+    return state['chat_history'], state
+# Initial state
+initial_state = {
+    'document_text': None,
+    'sections': None,
+    'section_embeddings': None,
+    'current_query': None,
+    'feedback_count': 0,
+    'mode': 'waiting_for_upload',
+    'chat_history': [("Bot", "Please upload a PDF or TXT file to start.")],
+    'last_answer': None
+}
+# Gradio interface
+with gr.Blocks() as demo:
+    state = gr.State(initial_state)
+    file_upload = gr.File(label="Upload PDF or TXT file")
+    chat = gr.Chatbot()
+    user_input = gr.Textbox(label="Your query or feedback")
+    submit_btn = gr.Button("Submit")
+    # Process file upload
+    file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
+    # Handle user input and clear the textbox
+    submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
+demo.launch()