Spaces:
Sleeping
Sleeping
| """ | |
| PDFPal - A lightweight, chat-based RAG application | |
| Built with free, local models and deployable via Gradio | |
| """ | |
| import os | |
| import tempfile | |
| import gradio as gr | |
| import time | |
| from typing import List, Dict, Any | |
| from pathlib import Path | |
| # Import our custom modules | |
| from modules.pdf_processor import PDFProcessor | |
| from modules.embedding_manager import EmbeddingManager | |
| from modules.llm_manager import LLMManager | |
| from modules.rag_pipeline import RAGPipeline | |
| from modules.chat_manager import ChatManager | |
| from config import Config | |
| class PDFPalApp: | |
| """Main PDFPal application using Gradio""" | |
| def __init__(self): | |
| """Initialize the PDFPal application""" | |
| self.chat_manager = ChatManager() | |
| self.rag_pipeline = None | |
| self.uploaded_files = [] | |
| self.current_model = Config.DEFAULT_LLM_MODEL | |
| # Initialize components | |
| self.pdf_processor = PDFProcessor() | |
| self.embedding_manager = EmbeddingManager() | |
| self.llm_manager = None | |
| # Create Gradio interface | |
| self.interface = self._create_interface() | |
| def _create_interface(self): | |
| """Create the Gradio interface""" | |
| # Custom CSS for better styling | |
| css = """ | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: auto !important; | |
| } | |
| .chat-container { | |
| height: 600px; | |
| overflow-y: auto; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 8px; | |
| padding: 20px; | |
| background: #fafafa; | |
| } | |
| .file-upload { | |
| border: 2px dashed #007bff; | |
| border-radius: 8px; | |
| padding: 20px; | |
| text-align: center; | |
| background: #f8f9fa; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="PDFPal - AI Chatbot", theme=gr.themes.Soft()) as interface: | |
| # Header | |
| gr.Markdown(""" | |
| # π PDFPal - AI Chatbot | |
| **Chat with your PDF documents using local AI models!** | |
| Upload one or more PDF files and start asking questions in natural language. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Sidebar for configuration | |
| gr.Markdown("### βοΈ Configuration") | |
| # Model selection | |
| model_dropdown = gr.Dropdown( | |
| choices=Config.get_model_names(), | |
| value=Config.get_recommended_model(), | |
| label="π€ Language Model", | |
| info="Choose a lightweight local model" | |
| ) | |
| # Advanced settings | |
| with gr.Accordion("π§ Advanced Settings", open=False): | |
| chunk_size = gr.Slider( | |
| minimum=500, maximum=2000, value=800, step=100, | |
| label="Chunk Size", info="Size of text chunks (smaller = faster)" | |
| ) | |
| chunk_overlap = gr.Slider( | |
| minimum=50, maximum=500, value=100, step=50, | |
| label="Chunk Overlap", info="Overlap between chunks" | |
| ) | |
| max_tokens = gr.Slider( | |
| minimum=100, maximum=1000, value=300, step=50, | |
| label="Max Response Tokens", info="Maximum response length (smaller = faster)" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.0, maximum=1.0, value=0.7, step=0.1, | |
| label="Temperature", info="Creativity level" | |
| ) | |
| # File upload section | |
| gr.Markdown("### π Upload Documents") | |
| file_upload = gr.File( | |
| file_count="multiple", | |
| file_types=[".pdf"], | |
| label="Choose PDF files" | |
| ) | |
| process_btn = gr.Button("π Process Documents", variant="primary") | |
| process_status = gr.Textbox(label="Status", interactive=False) | |
| # Model info | |
| model_info = gr.JSON(label="Model Information", visible=False) | |
| with gr.Column(scale=2): | |
| # Chat interface | |
| gr.Markdown("### π¬ Chat Interface") | |
| # Chat history display | |
| chat_history = gr.Chatbot( | |
| label="Conversation", | |
| height=500, | |
| show_label=False, | |
| container=True, | |
| bubble_full_width=False | |
| ) | |
| # Chat input | |
| with gr.Row(): | |
| chat_input = gr.Textbox( | |
| placeholder="Ask a question about your documents...", | |
| label="Your Question", | |
| scale=4 | |
| ) | |
| send_btn = gr.Button("Send", variant="primary", scale=1) | |
| # Clear chat button | |
| clear_btn = gr.Button("ποΈ Clear Chat", variant="secondary") | |
| # Export options | |
| with gr.Row(): | |
| export_json_btn = gr.Button("π Export JSON") | |
| export_txt_btn = gr.Button("π Export Text") | |
| # Statistics | |
| stats_display = gr.JSON(label="Chat Statistics", visible=False) | |
| # Event handlers | |
| model_dropdown.change( | |
| fn=self._change_model, | |
| inputs=[model_dropdown], | |
| outputs=[model_info, process_status] | |
| ) | |
| process_btn.click( | |
| fn=self._process_documents, | |
| inputs=[file_upload, chunk_size, chunk_overlap, model_dropdown], | |
| outputs=[process_status, model_info] | |
| ) | |
| send_btn.click( | |
| fn=self._send_message, | |
| inputs=[chat_input, max_tokens, temperature], | |
| outputs=[chat_history, chat_input, stats_display], | |
| show_progress=True | |
| ) | |
| chat_input.submit( | |
| fn=self._send_message, | |
| inputs=[chat_input, max_tokens, temperature], | |
| outputs=[chat_history, chat_input, stats_display], | |
| show_progress=True | |
| ) | |
| clear_btn.click( | |
| fn=self._clear_chat, | |
| outputs=[chat_history, stats_display] | |
| ) | |
| export_json_btn.click( | |
| fn=self._export_conversation_json, | |
| outputs=[gr.File()] | |
| ) | |
| export_txt_btn.click( | |
| fn=self._export_conversation_text, | |
| outputs=[gr.File()] | |
| ) | |
| return interface | |
| def _change_model(self, model_name): | |
| """Change the language model""" | |
| try: | |
| self.current_model = model_name | |
| self.llm_manager = LLMManager(model_name=model_name) | |
| model_info = self.llm_manager.get_model_info() | |
| return model_info, f"β Model changed to {model_name}" | |
| except Exception as e: | |
| return {}, f"β Error changing model: {str(e)}" | |
| def _process_documents(self, files, chunk_size, chunk_overlap, model_name): | |
| """Process uploaded PDF documents""" | |
| if not files: | |
| return "β οΈ Please upload PDF files first", {} | |
| try: | |
| # Update processor settings | |
| self.pdf_processor = PDFProcessor(chunk_size=chunk_size, chunk_overlap=chunk_overlap) | |
| # Initialize LLM manager | |
| self.llm_manager = LLMManager(model_name=model_name) | |
| # Process all files | |
| all_chunks = [] | |
| self.uploaded_files = [] | |
| for file in files: | |
| # Handle different file object types from Gradio | |
| if hasattr(file, 'read'): | |
| # File-like object | |
| file_content = file.read() | |
| file_name = getattr(file, 'name', f'file_{len(self.uploaded_files)}.pdf') | |
| elif isinstance(file, str): | |
| # File path string | |
| with open(file, 'rb') as f: | |
| file_content = f.read() | |
| file_name = os.path.basename(file) | |
| else: | |
| # Try to get content as bytes | |
| file_content = bytes(file) if hasattr(file, '__bytes__') else str(file).encode() | |
| file_name = f'file_{len(self.uploaded_files)}.pdf' | |
| # Save uploaded file temporarily | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: | |
| tmp_file.write(file_content) | |
| tmp_path = tmp_file.name | |
| try: | |
| # Process PDF | |
| chunks = self.pdf_processor.process_pdf(tmp_path) | |
| all_chunks.extend(chunks) | |
| self.uploaded_files.append(file_name) | |
| finally: | |
| # Clean up temporary file | |
| os.unlink(tmp_path) | |
| if all_chunks: | |
| # Create knowledge base | |
| knowledge_base = self.embedding_manager.create_knowledge_base(all_chunks) | |
| # Initialize RAG pipeline | |
| self.rag_pipeline = RAGPipeline( | |
| knowledge_base=knowledge_base, | |
| llm_manager=self.llm_manager | |
| ) | |
| model_info = self.llm_manager.get_model_info() | |
| status = f"β Processed {len(all_chunks)} text chunks from {len(files)} file(s)" | |
| return status, model_info | |
| else: | |
| return "β No text could be extracted from the uploaded files", {} | |
| except Exception as e: | |
| return f"β Error processing files: {str(e)}", {} | |
| def _send_message(self, message, max_tokens, temperature): | |
| """Send a message and get response""" | |
| start_time = time.time() | |
| if not message.strip(): | |
| return self.chat_manager.get_gradio_chat_history(), "", {} | |
| if not self.rag_pipeline: | |
| # Add user message | |
| self.chat_manager.add_message("user", message) | |
| # Add error response | |
| error_msg = "β οΈ Please upload and process documents first!" | |
| self.chat_manager.add_message("assistant", error_msg) | |
| return self.chat_manager.get_gradio_chat_history(), "", self.chat_manager.get_statistics() | |
| try: | |
| # Add user message | |
| self.chat_manager.add_message("user", message) | |
| # Get AI response with timing | |
| response_start = time.time() | |
| response = self.rag_pipeline.get_response( | |
| message, | |
| max_tokens=max_tokens, | |
| temperature=temperature | |
| ) | |
| response_time = time.time() - response_start | |
| # Add AI response | |
| self.chat_manager.add_message("assistant", response) | |
| # Add performance info to statistics | |
| total_time = time.time() - start_time | |
| stats = self.chat_manager.get_statistics() | |
| stats.update({ | |
| "response_time_seconds": round(response_time, 2), | |
| "total_time_seconds": round(total_time, 2), | |
| "performance_note": f"Response generated in {round(response_time, 2)}s" | |
| }) | |
| return self.chat_manager.get_gradio_chat_history(), "", stats | |
| except Exception as e: | |
| error_msg = f"β Error: {str(e)}" | |
| self.chat_manager.add_message("assistant", error_msg) | |
| return self.chat_manager.get_gradio_chat_history(), "", self.chat_manager.get_statistics() | |
| def _clear_chat(self): | |
| """Clear chat history""" | |
| self.chat_manager.clear_history() | |
| return [], {} | |
| def _export_conversation_json(self): | |
| """Export conversation as JSON""" | |
| try: | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.json') | |
| self.chat_manager.save_conversation(temp_file.name) | |
| return temp_file.name | |
| except Exception as e: | |
| return None | |
| def _export_conversation_text(self): | |
| """Export conversation as text""" | |
| try: | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.txt') | |
| self.chat_manager.export_conversation_text(temp_file.name) | |
| return temp_file.name | |
| except Exception as e: | |
| return None | |
| def launch(self, **kwargs): | |
| """Launch the Gradio interface""" | |
| return self.interface.launch(**kwargs) | |
| def main(): | |
| """Main entry point""" | |
| app = PDFPalApp() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| debug=True | |
| ) | |
| if __name__ == "__main__": | |
| main() | |