Spaces:

manabb
/

CharBotWithPDF

Sleeping

App Files Files Community

manabb commited on Dec 21, 2025

Commit

8eb71cf

verified ·

1 Parent(s): 6282a4a

Upload 2 files

Browse files

Files changed (2) hide show

app.py +266 -0
requirements.txt +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import gradio as gr
+from langchain.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import HuggingFaceHub
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+import os
+import tempfile
+import datetime
+class EnhancedPDFChatbot:
+    def __init__(self):
+        self.vectorstore = None
+        self.qa_chain = None
+        self.embeddings = HuggingFaceEmbeddings()
+        self.is_ready = False
+        self.chat_history = []
+    def process_pdf(self, pdf_file):
+        """Process uploaded PDF file with enhanced error handling"""
+        try:
+            if pdf_file is None:
+                return "Please select a PDF file first!"
+            # Save uploaded file
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+                tmp_file.write(pdf_file)
+                tmp_path = tmp_file.name
+            # Load and process PDF
+            loader = PyPDFLoader(tmp_path)
+            documents = loader.load()
+            # Clean up
+            os.unlink(tmp_path)
+            if not documents:
+                return "No content could be extracted from the PDF."
+            # Split text
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=800,
+                chunk_overlap=150,
+                length_function=len,
+            )
+            chunks = text_splitter.split_documents(documents)
+            # Create vector store
+            self.vectorstore = FAISS.from_documents(chunks, self.embeddings)
+            self.setup_qa_chain()
+            self.is_ready = True
+            self.chat_history = []
+            return f"✅ Success! Processed {len(documents)} pages into {len(chunks)} chunks. You can now ask questions!"
+        except Exception as e:
+            return f"❌ Error: {str(e)}"
+    def setup_qa_chain(self):
+        """Setup QA chain with enhanced prompt"""
+        llm = HuggingFaceHub(
+            repo_id="google/flan-t5-small",
+            model_kwargs={"temperature": 0.2, "max_length": 512, "repetition_penalty": 1.1}
+        )
+        prompt_template = """As an AI assistant, provide accurate answers based on the given context.
+CONTEXT:
+{context}
+QUESTION:
+{question}
+INSTRUCTIONS:
+- Answer clearly and concisely
+- Base your answer strictly on the context provided
+- If the answer isn't in the context, say "I cannot find this information in the document"
+- Use bullet points for lists when appropriate
+- Be helpful and professional
+ANSWER:
+"""
+        PROMPT = PromptTemplate(
+            template=prompt_template,
+            input_variables=["context", "question"]
+        )
+        self.qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="stuff",
+            retriever=self.vectorstore.as_retriever(
+                search_type="similarity",
+                search_kwargs={"k": 4}
+            ),
+            chain_type_kwargs={"prompt": PROMPT},
+            return_source_documents=True
+        )
+    def ask_question(self, question, history):
+        """Ask question with enhanced response formatting"""
+        if not self.is_ready:
+            return "Please upload and process a PDF first!", history
+        if not question.strip():
+            return "", history
+        try:
+            # Add timestamp
+            timestamp = datetime.datetime.now().strftime("%H:%M:%S")
+            result = self.qa_chain({"query": question})
+            answer = result["result"]
+            # Format response
+            formatted_response = f"**{timestamp}**\n\n{answer}\n\n---\n**Sources:**"
+            for i, doc in enumerate(result["source_documents"][:3]):
+                page_num = doc.metadata.get('page', 'N/A') + 1  # Convert to 1-indexed
+                content = doc.page_content.replace('\n', ' ').strip()
+                preview = content[:120] + "..." if len(content) > 120 else content
+                formatted_response += f"\n• Page {page_num}: {preview}"
+            # Update history
+            history.append((question, formatted_response))
+            self.chat_history = history
+            return "", history
+        except Exception as e:
+            error_msg = f"Error processing your question: {str(e)}"
+            history.append((question, error_msg))
+            return "", history
+    def clear_chat(self):
+        """Clear chat history"""
+        self.chat_history = []
+        return []
+# Create enhanced chatbot
+enhanced_chatbot = EnhancedPDFChatbot()
+# Create enhanced Gradio interface
+with gr.Blocks(title="Enhanced PDF Chatbot", theme=gr.themes.Default()) as enhanced_demo:
+    gr.Markdown("""
+    # 🚀 Enhanced PDF Chatbot Agent
+    **Upload a PDF document and have a conversation with AI about its content!**
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            with gr.Group():
+                gr.Markdown("### 📄 Document Upload")
+                pdf_input = gr.File(
+                    label="Upload PDF File",
+                    file_types=[".pdf"],
+                    type="binary"
+                )
+                upload_btn = gr.Button("Process Document", variant="primary")
+                status_output = gr.Textbox(label="Status", interactive=False)
+            with gr.Group():
+                gr.Markdown("### ⚙️ Settings")
+                chunk_size = gr.Slider(
+                    minimum=500,
+                    maximum=2000,
+                    value=800,
+                    step=100,
+                    label="Chunk Size"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.2,
+                    step=0.1,
+                    label="Temperature"
+                )
+        with gr.Column(scale=2):
+            gr.Markdown("### 💬 Chat Interface")
+            chatbot = gr.Chatbot(height=450, show_copy_button=True)
+            with gr.Row():
+                question_box = gr.Textbox(
+                    placeholder="Ask a question about the PDF...",
+                    label="Your Question",
+                    scale=4
+                )
+                ask_btn = gr.Button("Ask", scale=1)
+            with gr.Row():
+                clear_btn = gr.Button("Clear Chat", variant="secondary")
+                export_btn = gr.Button("Export Chat", variant="secondary")
+    # Examples
+    gr.Examples(
+        examples=[
+            "What is the main purpose of this document?",
+            "Summarize the key points in bullet form",
+            "What are the main findings or conclusions?",
+            "List any recommendations mentioned"
+        ],
+        inputs=question_box,
+        label="Example Questions"
+    )
+    # Event handlers
+    upload_btn.click(
+        fn=enhanced_chatbot.process_pdf,
+        inputs=pdf_input,
+        outputs=status_output
+    )
+    def ask_question_wrapper(question, history):
+        return enhanced_chatbot.ask_question(question, history)
+    ask_btn.click(
+        fn=ask_question_wrapper,
+        inputs=[question_box, chatbot],
+        outputs=[question_box, chatbot]
+    )
+    question_box.submit(
+        fn=ask_question_wrapper,
+        inputs=[question_box, chatbot],
+        outputs=[question_box, chatbot]
+    )
+    clear_btn.click(
+        fn=enhanced_chatbot.clear_chat,
+        inputs=[],
+        outputs=chatbot
+    )
+    # Export functionality
+    def export_chat():
+        if not enhanced_chatbot.chat_history:
+            return "No chat history to export!"
+        export_text = "PDF Chatbot Conversation Export\n"
+        export_text += "=" * 40 + "\n\n"
+        for i, (question, answer) in enumerate(enhanced_chatbot.chat_history, 1):
+            export_text += f"Q{i}: {question}\n"
+            export_text += f"A{i}: {answer}\n"
+            export_text += "-" * 30 + "\n"
+        return export_text
+    export_btn.click(
+        fn=export_chat,
+        inputs=[],
+        outputs=gr.Textbox(label="Exported Chat", lines=20)
+    )
+if __name__ == "__main__":
+    enhanced_demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+gradio==5.36.2
+transformers==4.53.3
+sentence-transformers==3.0.1
+langchain==0.3.27
+faiss-cpu==1.8.0
+langchain-community==0.3.27
+numpy<2