import gradio as gr import tempfile import os import logging from typing import List, Dict, Any, Optional, Tuple import time from datetime import datetime import json # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Import our modules from document_processor_hf import DocumentProcessor from inference_client import GemmaInferenceClient # Global variables for maintaining state processor = DocumentProcessor() inference_client = GemmaInferenceClient() current_document = None chat_history = [] processing_stats = {} def process_document(file, use_smart_processing, use_prf, use_variants, use_reranking, progress=gr.Progress()): """Process uploaded document""" global current_document, processing_stats if file is None: return "❌ Please upload a document first.", "", "" try: progress(0.1, desc="📄 Reading document...") # Process the document result = processor.process_document( file.name, use_smart_processing=use_smart_processing ) progress(0.7, desc="🧠 Extracting entities...") if result['success']: current_document = { 'name': os.path.basename(file.name), 'type': result.get('document_type', 'general'), 'chunks': len(result['chunks']), 'entities': len(result['entities']), 'suggestions': result.get('suggestions', []) } processing_stats = { 'document_type': result.get('document_type', 'general'), 'entities_found': len(result['entities']), 'chunks_created': len(result['chunks']), 'text_length': result.get('text_length', 0) } progress(1.0, desc="✅ Processing complete!") # Create document info display doc_info = f""" ## 📊 Document Information **📄 File:** {current_document['name']} **📝 Type:** {current_document['type'].title()} **📚 Chunks:** {current_document['chunks']} **🏷️ Entities:** {current_document['entities']} ### 🎯 Active Enhancements: {f"🔍 **Pseudo Relevance Feedback**: {'✅ Enabled' if use_prf else '❌ Disabled'}" if use_smart_processing else ""} {f"📝 **Query Variants**: {'✅ Enabled' if use_variants else '❌ Disabled'}" if use_smart_processing else ""} {f"🎯 **Cross-Encoder Reranking**: {'✅ Enabled' if use_reranking else '❌ Disabled'}" if use_smart_processing else ""} """ # Create suggested questions suggestions_html = "" if current_document['suggestions']: suggestions_html = "### 💡 Suggested Questions:\n" for i, suggestion in enumerate(current_document['suggestions'][:5]): suggestions_html += f"{i+1}. {suggestion}\n" success_msg = f"✅ **Document processed successfully!** Ready for questions." return success_msg, doc_info, suggestions_html else: error_msg = f"❌ **Processing failed:** {result.get('error', 'Unknown error')}" return error_msg, "", "" except Exception as e: logger.error(f"Document processing error: {e}") error_msg = f"❌ **Error:** {str(e)}" return error_msg, "", "" def chat_with_document(message, history, temperature, max_tokens, top_k, use_prf, use_variants, use_reranking): """Chat with the processed document""" global current_document if not current_document: history.append([message, "❌ Please upload and process a document first."]) return history, "" if not message.strip(): return history, "" try: # Add thinking message history.append([message, "🤔 Thinking..."]) yield history, "" # Query the document start_time = time.time() context_result = processor.query_document( message, top_k=min(top_k, 3), # Limit context for memory use_smart_retrieval=True, use_prf=use_prf, use_variants=use_variants, use_reranking=use_reranking ) # Generate response with memory-efficient settings response_result = inference_client.generate_response( query=message, context=context_result['context'][:2000], # Limit context length temperature=temperature, max_tokens=min(max_tokens, 256) # Limit response length for memory ) query_time = time.time() - start_time # Format response with enhancements info response = response_result['response'] # Add enhancement information enhancements = [] if use_prf: enhancements.append("🔍 PRF") if use_variants: enhancements.append("📝 Variants") if use_reranking: enhancements.append("🎯 Reranking") if enhancements: response += f"\n\n*Enhanced with: {' | '.join(enhancements)} | ⚡ {query_time:.2f}s*" # Clear memory after each response inference_client.clear_cache() # Update history with final response history[-1] = [message, response] yield history, "" except Exception as e: logger.error(f"Chat error: {e}") error_response = f"❌ Sorry, I encountered an error: {str(e)}" history[-1] = [message, error_response] yield history, "" def use_suggested_question(question_text, history): """Use a suggested question""" if question_text and current_document: return question_text, history return "", history def clear_chat(): """Clear chat history""" return [] def get_example_files(): """Get example file information""" examples = """ ### 📋 Try these document types: **📄 Resumes/CVs**: Upload a resume to ask "Whose resume is this?" or "What are their skills?" **📊 Reports**: Upload a business report to ask "What are the key findings?" or "What methodology was used?" **📋 Contracts**: Upload a contract to ask "What are the main terms?" or "Who are the parties involved?" **📚 Academic Papers**: Upload a research paper to ask "What is the research question?" or "What are the results?" **🖼️ Images**: Upload screenshots or scanned documents with text for OCR processing. """ return examples # Create the Gradio interface with gr.Blocks( title="Document Chat with Gemma 3", theme=gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="slate" ), css=""" .gradio-container { max-width: 1200px !important; } .main-header { text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 2rem; border-radius: 10px; margin-bottom: 2rem; } .enhancement-badge { display: inline-block; padding: 0.25rem 0.5rem; margin: 0.1rem; border-radius: 15px; font-size: 0.8rem; font-weight: bold; background-color: #e3f2fd; color: #1976d2; } .document-info { background-color: #f8f9fa; padding: 1rem; border-radius: 8px; border-left: 4px solid #667eea; } """ ) as demo: # Header gr.HTML("""

📄 Document Chat with Gemma 3

Upload documents and chat with advanced RAG capabilities powered by Gemma 3

🚀 Features: Smart Entity Extraction | Document Type Detection | Query Enhancement | Context-Aware Responses

""") with gr.Row(): # Left column - Document Upload and Configuration with gr.Column(scale=1): gr.Markdown("## 📤 Upload & Configure") # File upload file_input = gr.File( label="📄 Upload Document", file_types=[".pdf", ".txt", ".md", ".csv", ".docx", ".jpg", ".jpeg", ".png"], type="filepath" ) # Processing options with gr.Group(): gr.Markdown("### 🛠️ Processing Options") use_smart_processing = gr.Checkbox( label="🧠 Smart Processing", value=True, info="Enable entity extraction and document type detection" ) use_prf = gr.Checkbox( label="🔍 Pseudo Relevance Feedback", value=True, info="Expand queries using relevant document terms" ) use_variants = gr.Checkbox( label="📝 Query Variants", value=True, info="Generate multiple query reformulations" ) use_reranking = gr.Checkbox( label="🎯 Cross-Encoder Reranking", value=True, info="Rerank results using advanced models" ) # Model parameters with gr.Group(): gr.Markdown("### ⚙️ Model Parameters") temperature = gr.Slider( minimum=0.0, maximum=1.0, value=0.3, step=0.1, label="🌡️ Temperature", info="Controls response creativity" ) max_tokens = gr.Slider( minimum=64, maximum=256, value=128, step=32, label="📏 Max Tokens", info="Maximum response length (limited for memory)" ) top_k = gr.Slider( minimum=3, maximum=10, value=5, step=1, label="📚 Context Chunks", info="Number of document chunks to retrieve" ) # Process button process_btn = gr.Button( "🚀 Process Document", variant="primary", size="lg" ) # Processing status process_status = gr.Markdown("") # Right column - Chat Interface with gr.Column(scale=2): gr.Markdown("## 💬 Chat with Your Document") # Document info display doc_info_display = gr.Markdown("", visible=False) # Suggested questions suggested_questions_display = gr.Markdown("", visible=False) # Chat interface chatbot = gr.Chatbot( height=400, show_label=False, container=True, show_copy_button=True ) with gr.Row(): msg_input = gr.Textbox( placeholder="Ask a question about your document...", show_label=False, scale=4, container=False ) send_btn = gr.Button("Send", variant="primary", scale=1) clear_btn = gr.Button("Clear", variant="secondary", scale=1) # Bottom section - Examples and tips with gr.Row(): with gr.Column(): gr.Markdown(get_example_files()) with gr.Column(): gr.Markdown(""" ### 🎯 Pro Tips: **📋 For Resumes**: Ask "Whose resume is this?" or "What are their technical skills?" **📊 For Reports**: Ask "What are the key findings?" or "What methodology was used?" **📜 For Contracts**: Ask "What are the main terms?" or "Who are the parties involved?" **🔍 Advanced Queries**: The system understands context and can answer complex questions about relationships, timelines, and document structure. **⚡ Performance**: First query may take longer as models load. Subsequent queries are faster. """) # Event handlers def process_and_update(file, smart, prf, variants, rerank): """Process document and update displays""" status, doc_info, suggestions = process_document(file, smart, prf, variants, rerank) # Show/hide info panels based on success doc_info_visible = "✅" in status suggestions_visible = bool(suggestions.strip()) if suggestions else False return ( status, gr.update(value=doc_info, visible=doc_info_visible), gr.update(value=suggestions, visible=suggestions_visible) ) # Connect event handlers process_btn.click( fn=process_and_update, inputs=[file_input, use_smart_processing, use_prf, use_variants, use_reranking], outputs=[process_status, doc_info_display, suggested_questions_display] ) # Chat functionality msg_input.submit( fn=chat_with_document, inputs=[msg_input, chatbot, temperature, max_tokens, top_k, use_prf, use_variants, use_reranking], outputs=[chatbot, msg_input] ) send_btn.click( fn=chat_with_document, inputs=[msg_input, chatbot, temperature, max_tokens, top_k, use_prf, use_variants, use_reranking], outputs=[chatbot, msg_input] ) clear_btn.click( fn=clear_chat, outputs=[chatbot] ) # Launch configuration if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_api=False, show_error=True )