Spaces:
Sleeping
Sleeping
| """ | |
| Gradio app for Multi-Document RAG Assistant | |
| (Auto-loads documents from data/ directory) | |
| """ | |
| import gradio as gr | |
| from backend.processing import process_documents_from_directory, get_available_files | |
| from backend.rag import RAGEngine | |
| from backend.llm import LLMClient | |
| # ------------------------------- | |
| # Global state | |
| # ------------------------------- | |
| rag_engine = RAGEngine() | |
| llm_client = LLMClient() | |
| # ------------------------------- | |
| # Auto-initialize on startup | |
| # ------------------------------- | |
| def initialize_system(): | |
| """Initialize the system by loading documents from data/ directory.""" | |
| try: | |
| available_files = get_available_files("data") | |
| if not available_files: | |
| return "β οΈ No documents found in data/ directory. Please add PDF, TXT, or MD files to the data folder.", [] | |
| print(f"π Found {len(available_files)} files: {available_files}") | |
| # Check if we already have an index with these files | |
| if rag_engine.get_chunk_count() > 0: | |
| return f"β Using existing index with {rag_engine.get_chunk_count()} chunks", available_files | |
| # Process and index documents | |
| chunks = process_documents_from_directory("data") | |
| if chunks: | |
| rag_engine.add_documents(chunks) | |
| return f"β Ready! Indexed {len(chunks)} chunks from {len(available_files)} documents.", available_files | |
| else: | |
| return "β οΈ No valid content extracted from documents", available_files | |
| except Exception as e: | |
| return f"β Error initializing system: {str(e)}", [] | |
| # Initialize system on startup | |
| system_status, loaded_files = initialize_system() | |
| print(f"System Status: {system_status}") | |
| # ------------------------------- | |
| # Rebuild index function | |
| # ------------------------------- | |
| def rebuild_index(): | |
| """Rebuild the index from data/ directory.""" | |
| try: | |
| chunk_count = rag_engine.rebuild_from_data("data") | |
| available_files = get_available_files("data") | |
| if chunk_count > 0: | |
| status = f"β Rebuilt index with {chunk_count} chunks from {len(available_files)} files" | |
| else: | |
| status = "β οΈ No documents found to index" | |
| return status, chunk_count, available_files | |
| except Exception as e: | |
| return f"β Error rebuilding index: {str(e)}", 0, [] | |
| # ------------------------------- | |
| # Search & generate answer | |
| # ------------------------------- | |
| def search_and_answer(question, top_k, history): | |
| if not question.strip(): | |
| return history, "" | |
| if rag_engine.get_chunk_count() == 0: | |
| error_msg = "β οΈ No documents loaded. Please add PDF, TXT, or MD files to the 'data/' directory and click 'Rebuild Index'." | |
| history.append({"role": "user", "content": question}) | |
| history.append({"role": "assistant", "content": error_msg}) | |
| return history, "" | |
| try: | |
| # Search for relevant chunks | |
| results = rag_engine.search(question, top_k=top_k) | |
| if not results: | |
| no_results_msg = "β οΈ No relevant information found in the documents for this question." | |
| history.append({"role": "user", "content": question}) | |
| history.append({"role": "assistant", "content": no_results_msg}) | |
| return history, "" | |
| # Generate answer | |
| answer = llm_client.generate_answer(question, results) | |
| # Add to chat history | |
| history.append({"role": "user", "content": question}) | |
| history.append({"role": "assistant", "content": answer}) | |
| return history, "" | |
| except Exception as e: | |
| error_msg = f"β Error processing question: {str(e)}" | |
| history.append({"role": "user", "content": question}) | |
| history.append({"role": "assistant", "content": error_msg}) | |
| return history, "" | |
| def get_system_info(): | |
| """Get current system information.""" | |
| current_files = get_available_files("data") | |
| chunk_count = rag_engine.get_chunk_count() | |
| info = f""" | |
| **π System Status** | |
| **π Documents in data/ folder:** {len(current_files)} | |
| {chr(10).join([f"β’ {file}" for file in current_files]) if current_files else "β’ None"} | |
| **π§ Chunks Indexed:** {chunk_count} | |
| **π€ LLM Status:** {"β Azure OpenAI configured" if llm_client.has_token() else "β οΈ No Azure OpenAI token (using extractive fallback)"} | |
| **π‘ Usage:** Ask questions about the content in your documents. The system searches through all indexed chunks to provide relevant answers. | |
| """ | |
| return info | |
| # ------------------------------- | |
| # UI - Clean Chat Interface | |
| # ------------------------------- | |
| with gr.Blocks( | |
| title="AI Document Assistant", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: auto; | |
| } | |
| """ | |
| ) as demo: | |
| # Header | |
| gr.Markdown(""" | |
| # π€ AI Document Assistant | |
| Ask questions about your documents. The system automatically loads all documents from the `data/` directory. | |
| """) | |
| # System info and controls | |
| with gr.Accordion("π System Information & Controls", open=False): | |
| system_info = gr.Markdown(get_system_info()) | |
| with gr.Row(): | |
| refresh_info_btn = gr.Button("π Refresh Info", variant="secondary") | |
| rebuild_btn = gr.Button("π¨ Rebuild Index", variant="secondary") | |
| rebuild_status = gr.Markdown() | |
| # Main chat interface | |
| chatbot = gr.Chatbot( | |
| type="messages", | |
| height=500, | |
| show_label=False, | |
| container=True, | |
| show_copy_button=True | |
| ) | |
| # Input area | |
| with gr.Row(): | |
| question = gr.Textbox( | |
| placeholder="Ask a question about your documents...", | |
| label="Your Question", | |
| scale=4, | |
| lines=1, | |
| max_lines=3 | |
| ) | |
| submit_btn = gr.Button("π¬ Send", variant="primary", scale=1) | |
| # Advanced options | |
| with gr.Accordion("βοΈ Advanced Settings", open=False): | |
| top_k = gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=5, | |
| step=1, | |
| label="Number of document chunks to retrieve", | |
| info="Higher values provide more context but may include less relevant information" | |
| ) | |
| clear_btn = gr.Button("ποΈ Clear Chat History", variant="secondary") | |
| # ------------------------------- | |
| # Event handlers | |
| # ------------------------------- | |
| # Submit on button click | |
| submit_btn.click( | |
| search_and_answer, | |
| inputs=[question, top_k, chatbot], | |
| outputs=[chatbot, question] | |
| ) | |
| # Submit on Enter key | |
| question.submit( | |
| search_and_answer, | |
| inputs=[question, top_k, chatbot], | |
| outputs=[chatbot, question] | |
| ) | |
| # Clear chat history | |
| clear_btn.click( | |
| lambda: [], | |
| outputs=[chatbot] | |
| ) | |
| # Refresh system info | |
| refresh_info_btn.click( | |
| get_system_info, | |
| outputs=[system_info] | |
| ) | |
| # Rebuild index | |
| rebuild_btn.click( | |
| rebuild_index, | |
| outputs=[rebuild_status, system_info, system_info] # Update both status and info | |
| ) | |
| # Show welcome message if system is ready | |
| if rag_engine.get_chunk_count() > 0: | |
| demo.load( | |
| lambda: [{ | |
| "role": "assistant", | |
| "content": f"π **Welcome to AI Document Assistant!**\n\nI'm ready to help you with questions about your documents. I have access to **{rag_engine.get_chunk_count()} chunks** of information from **{len(loaded_files)} documents**:\n\n" + | |
| "\n".join([f"π {file}" for file in loaded_files]) + | |
| f"\n\nπ‘ **What would you like to know?** You can ask about specific topics, request summaries, or explore relationships between different documents." | |
| }], | |
| outputs=[chatbot] | |
| ) | |
| else: | |
| demo.load( | |
| lambda: [{ | |
| "role": "assistant", | |
| "content": "β οΈ **No documents loaded.**\n\nTo get started:\n1. Create a `data/` folder in your project directory\n2. Add PDF, TXT, or MD files to the folder\n3. Click 'π¨ Rebuild Index' or restart the application\n\nI'll automatically load and index all your documents for instant searching!" | |
| }], | |
| outputs=[chatbot] | |
| ) | |
| # ------------------------------- | |
| # Launch | |
| # ------------------------------- | |
| if __name__ == "__main__": | |
| demo.launch( | |
| debug=True | |
| ) |