Spaces:

Omartificial-Intelligence-Space
/

context-caching-gemini-pdf-qa

Sleeping

App Files Files Community

Omartificial-Intelligence-Space commited on Jul 13

Commit

548d3b3

verified ·

1 Parent(s): 0a6b1c6

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -24

app.py CHANGED Viewed

@@ -21,8 +21,8 @@ if GOOGLE_API_KEY is None:
 app = Flask(__name__)
 CORS(app)
-# Configure Flask for larger file uploads
-app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024  # 50MB max file size
 # Initialize Gemini client with correct API key
 client = genai.Client(api_key=GOOGLE_API_KEY)
@@ -330,7 +330,8 @@ HTML_TEMPLATE = """
     <div class="container">
         <div class="header">
             <h1>📚 Smart Document Analysis Platform</h1>
-            <p>Upload PDF documents once, ask questions forever with Gemini API caching</p>
         </div>
         <div class="main-content">
@@ -344,7 +345,8 @@ HTML_TEMPLATE = """
                     <div class="upload-area" id="uploadArea">
                         <div class="upload-icon">📄</div>
                         <p>Drag and drop your PDF file here, or click to select</p>
-                        <p style="font-size: 0.9em; color: #666; margin-top: 10px;">Maximum file size: 50MB</p>
                         <input type="file" id="fileInput" class="file-input" accept=".pdf">
                         <button class="upload-btn" onclick="document.getElementById('fileInput').click()">
                             Choose PDF File
@@ -375,9 +377,12 @@ HTML_TEMPLATE = """
                 <div id="cacheInfo" class="cache-info" style="display: none;">
                     <h3>✅ Document Cached Successfully!</h3>
-                    <p>Your PDF has been cached using Gemini API. You can now ask multiple questions without re-uploading.</p>
                     <p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
                     <p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
                 </div>
                 <div class="chat-container" id="chatContainer">
@@ -431,13 +436,20 @@ HTML_TEMPLATE = """
                 return;
             }
-            // Check file size on client side (50MB limit)
-            if (file.size > 50 * 1024 * 1024) {
-                showError('File too large. Maximum size is 50MB.');
                 return;
             }
-            showLoading('Uploading PDF...');
             const formData = new FormData();
             formData.append('file', file);
@@ -454,6 +466,8 @@ HTML_TEMPLATE = """
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
@@ -461,6 +475,9 @@ HTML_TEMPLATE = """
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
                 }
             } catch (error) {
                 showError('Error uploading file: ' + error.message);
@@ -493,6 +510,8 @@ HTML_TEMPLATE = """
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
@@ -500,6 +519,9 @@ HTML_TEMPLATE = """
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
                 }
             } catch (error) {
                 showError('Error uploading from URL: ' + error.message);
@@ -616,13 +638,20 @@ def upload_file():
         if file.filename == '':
             return jsonify({'success': False, 'error': 'No file selected'})
-        # Check file size (limit to 50MB for PDFs)
         file.seek(0, 2)  # Seek to end
         file_size = file.tell()
         file.seek(0)  # Reset to beginning
-        if file_size > 50 * 1024 * 1024:  # 50MB limit
-            return jsonify({'success': False, 'error': 'File too large. Maximum size is 50MB.'})
         # Read file content
         file_content = file.read()
@@ -649,8 +678,8 @@ def upload_file():
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
-            # Use the correct model name (without 'models/' prefix)
-            model = 'gemini-2.0-flash-001'
             cache = client.caches.create(
                 model=model,
@@ -690,13 +719,55 @@ def upload_file():
         except Exception as cache_error:
             print(f"Cache error: {cache_error}")
-            # If caching fails due to small content, provide alternative approach
-            if "too small" in str(cache_error).lower():
                 return jsonify({
                     'success': False,
-                    'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
-                    'suggestion': 'Try uploading a longer document or combine multiple documents.'
                 })
             else:
                 return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
@@ -726,8 +797,14 @@ def upload_from_url():
                 # Check file size
                 content_length = len(response.content)
-                if content_length > 50 * 1024 * 1024:  # 50MB limit
-                    return jsonify({'success': False, 'error': 'File too large. Maximum size is 50MB.'})
                 file_io = io.BytesIO(response.content)
@@ -759,8 +836,8 @@ def upload_from_url():
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
-            # Use the correct model name (without 'models/' prefix)
-            model = 'gemini-2.0-flash-001'
             cache = client.caches.create(
                 model=model,
@@ -833,7 +910,7 @@ def ask_question():
         # Generate response using cached content with correct model format
         try:
             response = client.models.generate_content(
-                model='gemini-2.0-flash-001',  # No 'models/' prefix here
                 contents=question,
                 config=types.GenerateContentConfig(
                     cached_content=cache_info['cache_name']
@@ -906,7 +983,7 @@ def health_check():
 # Error handlers
 @app.errorhandler(413)
 def too_large(e):
-    return jsonify({'success': False, 'error': 'File too large. Maximum size is 50MB.'}), 413
 @app.errorhandler(500)
 def internal_error(e):

 app = Flask(__name__)
 CORS(app)
+# Configure Flask for large file uploads (200MB for substantial documents)
+app.config['MAX_CONTENT_LENGTH'] = 200 * 1024 * 1024  # 200MB max file size
 # Initialize Gemini client with correct API key
 client = genai.Client(api_key=GOOGLE_API_KEY)
     <div class="container">
         <div class="header">
             <h1>📚 Smart Document Analysis Platform</h1>
+            <p>Upload substantial PDF documents (5MB+ recommended) for efficient context caching with Gemini API</p>
+            <p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">💡 Context caching requires minimum token thresholds - larger documents work better</p>
         </div>
         <div class="main-content">
                     <div class="upload-area" id="uploadArea">
                         <div class="upload-icon">📄</div>
                         <p>Drag and drop your PDF file here, or click to select</p>
+                        <p style="font-size: 0.9em; color: #666; margin-top: 5px;">For context caching to work: Upload substantial documents (5MB+ recommended)</p>
+                        <p style="font-size: 0.8em; color: #888; margin-top: 5px;">Maximum file size: 200MB</p>
                         <input type="file" id="fileInput" class="file-input" accept=".pdf">
                         <button class="upload-btn" onclick="document.getElementById('fileInput').click()">
                             Choose PDF File
                 <div id="cacheInfo" class="cache-info" style="display: none;">
                     <h3>✅ Document Cached Successfully!</h3>
+                    <p>Your PDF has been cached using Gemini API context caching. You can now ask multiple questions efficiently without re-uploading.</p>
+                    <p><strong>Document:</strong> <span id="documentName"></span></p>
                     <p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
                     <p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
+                    <p><strong>Model:</strong> <span id="modelUsed"></span></p>
+                    <p style="font-size: 0.9em; margin-top: 10px; opacity: 0.8;">💡 Cache valid for 1 hour. Subsequent questions will use cached content for faster responses.</p>
                 </div>
                 <div class="chat-container" id="chatContainer">
                 return;
             }
+            // Check file size on client side (200MB limit)
+            const fileSizeMB = file.size / (1024 * 1024);
+            if (file.size > 200 * 1024 * 1024) {
+                showError(`File too large (${fileSizeMB.toFixed(1)}MB). Maximum size is 200MB.`);
                 return;
             }
+            // Warn about small files that might not cache
+            if (file.size < 1024 * 1024) {
+                showError(`File might be too small (${fileSizeMB.toFixed(1)}MB) for context caching. For best results, upload documents with substantial text content (>5MB recommended).`);
+                return;
+            }
+            showLoading(`Uploading PDF (${fileSizeMB.toFixed(1)}MB)...`);
             const formData = new FormData();
             formData.append('file', file);
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
+                    document.getElementById('documentName').textContent = result.document_name;
+                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
+                    if (result.suggestion) {
+                        showError(result.suggestion);
+                    }
                 }
             } catch (error) {
                 showError('Error uploading file: ' + error.message);
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
+                    document.getElementById('documentName').textContent = result.document_name;
+                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
+                    if (result.suggestion) {
+                        showError(result.suggestion);
+                    }
                 }
             } catch (error) {
                 showError('Error uploading from URL: ' + error.message);
         if file.filename == '':
             return jsonify({'success': False, 'error': 'No file selected'})
+        # Check file size (limit to 200MB for large documents needed for caching)
         file.seek(0, 2)  # Seek to end
         file_size = file.tell()
         file.seek(0)  # Reset to beginning
+        # Convert to MB for display
+        file_size_mb = file_size / (1024 * 1024)
+        if file_size > 200 * 1024 * 1024:  # 200MB limit
+            return jsonify({'success': False, 'error': f'File too large ({file_size_mb:.1f}MB). Maximum size is 200MB.'})
+        # Warn about small files that might not cache
+        if file_size < 1024 * 1024:  # Less than 1MB
+            print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
         # Read file content
         file_content = file.read()
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
+            # Use the correct model name - try 2.5 Flash first (lower token requirement)
+            model = 'gemini-2.5-flash-001'
             cache = client.caches.create(
                 model=model,
         except Exception as cache_error:
             print(f"Cache error: {cache_error}")
+            # Provide more specific error handling for token requirements
+            error_msg = str(cache_error).lower()
+            if "too small" in error_msg or "minimum" in error_msg:
                 return jsonify({
                     'success': False,
+                    'error': f'Document content is insufficient for caching. Gemini 2.5 Flash requires minimum 1,024 tokens (~2-3 pages of text). Your document: {file.filename} ({file_size_mb:.1f}MB)',
+                    'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).',
+                    'fallback': 'You can still use the document without caching by implementing direct file processing.'
                 })
+            elif "invalid" in error_msg or "model" in error_msg:
+                # Try fallback to 2.0 Flash
+                try:
+                    cache_fallback = client.caches.create(
+                        model='gemini-2.0-flash-001',
+                        config=types.CreateCachedContentConfig(
+                            display_name=f'PDF document cache - {file.filename}',
+                            system_instruction=system_instruction,
+                            contents=[document],
+                            ttl="3600s",
+                        )
+                    )
+                    print(f"Fallback cache created with 2.0 Flash: {cache_fallback.name}")
+                    # Store with fallback model info
+                    cache_id = str(uuid.uuid4())
+                    document_caches[cache_id] = {
+                        'cache_name': cache_fallback.name,
+                        'document_name': file.filename,
+                        'document_file_name': document.name,
+                        'model': 'gemini-2.0-flash-001',
+                        'created_at': datetime.now().isoformat()
+                    }
+                    token_count = 'Unknown'
+                    if hasattr(cache_fallback, 'usage_metadata') and cache_fallback.usage_metadata:
+                        if hasattr(cache_fallback.usage_metadata, 'total_token_count'):
+                            token_count = cache_fallback.usage_metadata.total_token_count
+                    return jsonify({
+                        'success': True,
+                        'cache_id': cache_id,
+                        'token_count': token_count,
+                        'document_name': file.filename,
+                        'model_used': 'gemini-2.0-flash-001'
+                    })
+                except Exception as fallback_error:
+                    print(f"Fallback cache error: {fallback_error}")
+                    return jsonify({'success': False, 'error': f'Failed to create cache with both models: {str(fallback_error)}'})
             else:
                 return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
                 # Check file size
                 content_length = len(response.content)
+                content_length_mb = content_length / (1024 * 1024)
+                if content_length > 200 * 1024 * 1024:  # 200MB limit
+                    return jsonify({'success': False, 'error': f'File too large ({content_length_mb:.1f}MB). Maximum size is 200MB.'})
+                # Warn about small files
+                if content_length < 1024 * 1024:  # Less than 1MB
+                    print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
                 file_io = io.BytesIO(response.content)
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
+            # Use the correct model name - try 2.5 Flash first (lower token requirement)
+            model = 'gemini-2.5-flash-001'
             cache = client.caches.create(
                 model=model,
         # Generate response using cached content with correct model format
         try:
             response = client.models.generate_content(
+                model='gemini-2.5-flash-001',  # Use 2.5 Flash for consistency
                 contents=question,
                 config=types.GenerateContentConfig(
                     cached_content=cache_info['cache_name']
 # Error handlers
 @app.errorhandler(413)
 def too_large(e):
+    return jsonify({'success': False, 'error': 'File too large. Maximum size is 200MB for substantial documents needed for context caching.'}), 413
 @app.errorhandler(500)
 def internal_error(e):