📄 Document Chat with Gemma 3
Upload documents and chat with advanced RAG capabilities powered by Gemma 3
🚀 Features: Smart Entity Extraction | Document Type Detection | Query Enhancement | Context-Aware Responses
import gradio as gr import tempfile import os import logging from typing import List, Dict, Any, Optional, Tuple import time from datetime import datetime import json # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Import our modules from document_processor_hf import DocumentProcessor from inference_client import GemmaInferenceClient # Global variables for maintaining state processor = DocumentProcessor() inference_client = GemmaInferenceClient() current_document = None chat_history = [] processing_stats = {} def process_document(file, use_smart_processing, use_prf, use_variants, use_reranking, progress=gr.Progress()): """Process uploaded document""" global current_document, processing_stats if file is None: return "❌ Please upload a document first.", "", "" try: progress(0.1, desc="📄 Reading document...") # Process the document result = processor.process_document( file.name, use_smart_processing=use_smart_processing ) progress(0.7, desc="🧠 Extracting entities...") if result['success']: current_document = { 'name': os.path.basename(file.name), 'type': result.get('document_type', 'general'), 'chunks': len(result['chunks']), 'entities': len(result['entities']), 'suggestions': result.get('suggestions', []) } processing_stats = { 'document_type': result.get('document_type', 'general'), 'entities_found': len(result['entities']), 'chunks_created': len(result['chunks']), 'text_length': result.get('text_length', 0) } progress(1.0, desc="✅ Processing complete!") # Create document info display doc_info = f""" ## 📊 Document Information **📄 File:** {current_document['name']} **📝 Type:** {current_document['type'].title()} **📚 Chunks:** {current_document['chunks']} **🏷️ Entities:** {current_document['entities']} ### 🎯 Active Enhancements: {f"🔍 **Pseudo Relevance Feedback**: {'✅ Enabled' if use_prf else '❌ Disabled'}" if use_smart_processing else ""} {f"📝 **Query Variants**: {'✅ Enabled' if use_variants else '❌ Disabled'}" if use_smart_processing else ""} {f"🎯 **Cross-Encoder Reranking**: {'✅ Enabled' if use_reranking else '❌ Disabled'}" if use_smart_processing else ""} """ # Create suggested questions suggestions_html = "" if current_document['suggestions']: suggestions_html = "### 💡 Suggested Questions:\n" for i, suggestion in enumerate(current_document['suggestions'][:5]): suggestions_html += f"{i+1}. {suggestion}\n" success_msg = f"✅ **Document processed successfully!** Ready for questions." return success_msg, doc_info, suggestions_html else: error_msg = f"❌ **Processing failed:** {result.get('error', 'Unknown error')}" return error_msg, "", "" except Exception as e: logger.error(f"Document processing error: {e}") error_msg = f"❌ **Error:** {str(e)}" return error_msg, "", "" def chat_with_document(message, history, temperature, max_tokens, top_k, use_prf, use_variants, use_reranking): """Chat with the processed document""" global current_document if not current_document: history.append([message, "❌ Please upload and process a document first."]) return history, "" if not message.strip(): return history, "" try: # Add thinking message history.append([message, "🤔 Thinking..."]) yield history, "" # Query the document start_time = time.time() context_result = processor.query_document( message, top_k=min(top_k, 3), # Limit context for memory use_smart_retrieval=True, use_prf=use_prf, use_variants=use_variants, use_reranking=use_reranking ) # Generate response with memory-efficient settings response_result = inference_client.generate_response( query=message, context=context_result['context'][:2000], # Limit context length temperature=temperature, max_tokens=min(max_tokens, 256) # Limit response length for memory ) query_time = time.time() - start_time # Format response with enhancements info response = response_result['response'] # Add enhancement information enhancements = [] if use_prf: enhancements.append("🔍 PRF") if use_variants: enhancements.append("📝 Variants") if use_reranking: enhancements.append("🎯 Reranking") if enhancements: response += f"\n\n*Enhanced with: {' | '.join(enhancements)} | ⚡ {query_time:.2f}s*" # Clear memory after each response inference_client.clear_cache() # Update history with final response history[-1] = [message, response] yield history, "" except Exception as e: logger.error(f"Chat error: {e}") error_response = f"❌ Sorry, I encountered an error: {str(e)}" history[-1] = [message, error_response] yield history, "" def use_suggested_question(question_text, history): """Use a suggested question""" if question_text and current_document: return question_text, history return "", history def clear_chat(): """Clear chat history""" return [] def get_example_files(): """Get example file information""" examples = """ ### 📋 Try these document types: **📄 Resumes/CVs**: Upload a resume to ask "Whose resume is this?" or "What are their skills?" **📊 Reports**: Upload a business report to ask "What are the key findings?" or "What methodology was used?" **📋 Contracts**: Upload a contract to ask "What are the main terms?" or "Who are the parties involved?" **📚 Academic Papers**: Upload a research paper to ask "What is the research question?" or "What are the results?" **🖼️ Images**: Upload screenshots or scanned documents with text for OCR processing. """ return examples # Create the Gradio interface with gr.Blocks( title="Document Chat with Gemma 3", theme=gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="slate" ), css=""" .gradio-container { max-width: 1200px !important; } .main-header { text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 2rem; border-radius: 10px; margin-bottom: 2rem; } .enhancement-badge { display: inline-block; padding: 0.25rem 0.5rem; margin: 0.1rem; border-radius: 15px; font-size: 0.8rem; font-weight: bold; background-color: #e3f2fd; color: #1976d2; } .document-info { background-color: #f8f9fa; padding: 1rem; border-radius: 8px; border-left: 4px solid #667eea; } """ ) as demo: # Header gr.HTML("""
Upload documents and chat with advanced RAG capabilities powered by Gemma 3
🚀 Features: Smart Entity Extraction | Document Type Detection | Query Enhancement | Context-Aware Responses