Update app.py
Browse files
app.py
CHANGED
|
@@ -21,8 +21,8 @@ if GOOGLE_API_KEY is None:
|
|
| 21 |
app = Flask(__name__)
|
| 22 |
CORS(app)
|
| 23 |
|
| 24 |
-
# Configure Flask for
|
| 25 |
-
app.config['MAX_CONTENT_LENGTH'] =
|
| 26 |
|
| 27 |
# Initialize Gemini client with correct API key
|
| 28 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
|
@@ -330,7 +330,8 @@ HTML_TEMPLATE = """
|
|
| 330 |
<div class="container">
|
| 331 |
<div class="header">
|
| 332 |
<h1>π Smart Document Analysis Platform</h1>
|
| 333 |
-
<p>Upload PDF documents
|
|
|
|
| 334 |
</div>
|
| 335 |
|
| 336 |
<div class="main-content">
|
|
@@ -344,7 +345,8 @@ HTML_TEMPLATE = """
|
|
| 344 |
<div class="upload-area" id="uploadArea">
|
| 345 |
<div class="upload-icon">π</div>
|
| 346 |
<p>Drag and drop your PDF file here, or click to select</p>
|
| 347 |
-
<p style="font-size: 0.9em; color: #666; margin-top:
|
|
|
|
| 348 |
<input type="file" id="fileInput" class="file-input" accept=".pdf">
|
| 349 |
<button class="upload-btn" onclick="document.getElementById('fileInput').click()">
|
| 350 |
Choose PDF File
|
|
@@ -375,9 +377,12 @@ HTML_TEMPLATE = """
|
|
| 375 |
|
| 376 |
<div id="cacheInfo" class="cache-info" style="display: none;">
|
| 377 |
<h3>β
Document Cached Successfully!</h3>
|
| 378 |
-
<p>Your PDF has been cached using Gemini API. You can now ask multiple questions without re-uploading.</p>
|
|
|
|
| 379 |
<p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
|
| 380 |
<p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
|
|
|
|
|
|
|
| 381 |
</div>
|
| 382 |
|
| 383 |
<div class="chat-container" id="chatContainer">
|
|
@@ -431,13 +436,20 @@ HTML_TEMPLATE = """
|
|
| 431 |
return;
|
| 432 |
}
|
| 433 |
|
| 434 |
-
// Check file size on client side (
|
| 435 |
-
|
| 436 |
-
|
|
|
|
| 437 |
return;
|
| 438 |
}
|
| 439 |
|
| 440 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
|
| 442 |
const formData = new FormData();
|
| 443 |
formData.append('file', file);
|
|
@@ -454,6 +466,8 @@ HTML_TEMPLATE = """
|
|
| 454 |
currentCacheId = result.cache_id;
|
| 455 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 456 |
document.getElementById('tokenCount').textContent = result.token_count;
|
|
|
|
|
|
|
| 457 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 458 |
showSuccess('PDF uploaded and cached successfully!');
|
| 459 |
|
|
@@ -461,6 +475,9 @@ HTML_TEMPLATE = """
|
|
| 461 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 462 |
} else {
|
| 463 |
showError(result.error);
|
|
|
|
|
|
|
|
|
|
| 464 |
}
|
| 465 |
} catch (error) {
|
| 466 |
showError('Error uploading file: ' + error.message);
|
|
@@ -493,6 +510,8 @@ HTML_TEMPLATE = """
|
|
| 493 |
currentCacheId = result.cache_id;
|
| 494 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 495 |
document.getElementById('tokenCount').textContent = result.token_count;
|
|
|
|
|
|
|
| 496 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 497 |
showSuccess('PDF uploaded and cached successfully!');
|
| 498 |
|
|
@@ -500,6 +519,9 @@ HTML_TEMPLATE = """
|
|
| 500 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 501 |
} else {
|
| 502 |
showError(result.error);
|
|
|
|
|
|
|
|
|
|
| 503 |
}
|
| 504 |
} catch (error) {
|
| 505 |
showError('Error uploading from URL: ' + error.message);
|
|
@@ -616,13 +638,20 @@ def upload_file():
|
|
| 616 |
if file.filename == '':
|
| 617 |
return jsonify({'success': False, 'error': 'No file selected'})
|
| 618 |
|
| 619 |
-
# Check file size (limit to
|
| 620 |
file.seek(0, 2) # Seek to end
|
| 621 |
file_size = file.tell()
|
| 622 |
file.seek(0) # Reset to beginning
|
| 623 |
|
| 624 |
-
|
| 625 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
|
| 627 |
# Read file content
|
| 628 |
file_content = file.read()
|
|
@@ -649,8 +678,8 @@ def upload_file():
|
|
| 649 |
try:
|
| 650 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 651 |
|
| 652 |
-
# Use the correct model name (
|
| 653 |
-
model = 'gemini-2.
|
| 654 |
|
| 655 |
cache = client.caches.create(
|
| 656 |
model=model,
|
|
@@ -690,13 +719,55 @@ def upload_file():
|
|
| 690 |
|
| 691 |
except Exception as cache_error:
|
| 692 |
print(f"Cache error: {cache_error}")
|
| 693 |
-
#
|
| 694 |
-
|
|
|
|
| 695 |
return jsonify({
|
| 696 |
'success': False,
|
| 697 |
-
'error': '
|
| 698 |
-
'suggestion': '
|
|
|
|
| 699 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 700 |
else:
|
| 701 |
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
|
| 702 |
|
|
@@ -726,8 +797,14 @@ def upload_from_url():
|
|
| 726 |
|
| 727 |
# Check file size
|
| 728 |
content_length = len(response.content)
|
| 729 |
-
|
| 730 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
|
| 732 |
file_io = io.BytesIO(response.content)
|
| 733 |
|
|
@@ -759,8 +836,8 @@ def upload_from_url():
|
|
| 759 |
try:
|
| 760 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 761 |
|
| 762 |
-
# Use the correct model name (
|
| 763 |
-
model = 'gemini-2.
|
| 764 |
|
| 765 |
cache = client.caches.create(
|
| 766 |
model=model,
|
|
@@ -833,7 +910,7 @@ def ask_question():
|
|
| 833 |
# Generate response using cached content with correct model format
|
| 834 |
try:
|
| 835 |
response = client.models.generate_content(
|
| 836 |
-
model='gemini-2.
|
| 837 |
contents=question,
|
| 838 |
config=types.GenerateContentConfig(
|
| 839 |
cached_content=cache_info['cache_name']
|
|
@@ -906,7 +983,7 @@ def health_check():
|
|
| 906 |
# Error handlers
|
| 907 |
@app.errorhandler(413)
|
| 908 |
def too_large(e):
|
| 909 |
-
return jsonify({'success': False, 'error': 'File too large. Maximum size is
|
| 910 |
|
| 911 |
@app.errorhandler(500)
|
| 912 |
def internal_error(e):
|
|
|
|
| 21 |
app = Flask(__name__)
|
| 22 |
CORS(app)
|
| 23 |
|
| 24 |
+
# Configure Flask for large file uploads (200MB for substantial documents)
|
| 25 |
+
app.config['MAX_CONTENT_LENGTH'] = 200 * 1024 * 1024 # 200MB max file size
|
| 26 |
|
| 27 |
# Initialize Gemini client with correct API key
|
| 28 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
|
|
|
| 330 |
<div class="container">
|
| 331 |
<div class="header">
|
| 332 |
<h1>π Smart Document Analysis Platform</h1>
|
| 333 |
+
<p>Upload substantial PDF documents (5MB+ recommended) for efficient context caching with Gemini API</p>
|
| 334 |
+
<p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">π‘ Context caching requires minimum token thresholds - larger documents work better</p>
|
| 335 |
</div>
|
| 336 |
|
| 337 |
<div class="main-content">
|
|
|
|
| 345 |
<div class="upload-area" id="uploadArea">
|
| 346 |
<div class="upload-icon">π</div>
|
| 347 |
<p>Drag and drop your PDF file here, or click to select</p>
|
| 348 |
+
<p style="font-size: 0.9em; color: #666; margin-top: 5px;">For context caching to work: Upload substantial documents (5MB+ recommended)</p>
|
| 349 |
+
<p style="font-size: 0.8em; color: #888; margin-top: 5px;">Maximum file size: 200MB</p>
|
| 350 |
<input type="file" id="fileInput" class="file-input" accept=".pdf">
|
| 351 |
<button class="upload-btn" onclick="document.getElementById('fileInput').click()">
|
| 352 |
Choose PDF File
|
|
|
|
| 377 |
|
| 378 |
<div id="cacheInfo" class="cache-info" style="display: none;">
|
| 379 |
<h3>β
Document Cached Successfully!</h3>
|
| 380 |
+
<p>Your PDF has been cached using Gemini API context caching. You can now ask multiple questions efficiently without re-uploading.</p>
|
| 381 |
+
<p><strong>Document:</strong> <span id="documentName"></span></p>
|
| 382 |
<p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
|
| 383 |
<p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
|
| 384 |
+
<p><strong>Model:</strong> <span id="modelUsed"></span></p>
|
| 385 |
+
<p style="font-size: 0.9em; margin-top: 10px; opacity: 0.8;">π‘ Cache valid for 1 hour. Subsequent questions will use cached content for faster responses.</p>
|
| 386 |
</div>
|
| 387 |
|
| 388 |
<div class="chat-container" id="chatContainer">
|
|
|
|
| 436 |
return;
|
| 437 |
}
|
| 438 |
|
| 439 |
+
// Check file size on client side (200MB limit)
|
| 440 |
+
const fileSizeMB = file.size / (1024 * 1024);
|
| 441 |
+
if (file.size > 200 * 1024 * 1024) {
|
| 442 |
+
showError(`File too large (${fileSizeMB.toFixed(1)}MB). Maximum size is 200MB.`);
|
| 443 |
return;
|
| 444 |
}
|
| 445 |
|
| 446 |
+
// Warn about small files that might not cache
|
| 447 |
+
if (file.size < 1024 * 1024) {
|
| 448 |
+
showError(`File might be too small (${fileSizeMB.toFixed(1)}MB) for context caching. For best results, upload documents with substantial text content (>5MB recommended).`);
|
| 449 |
+
return;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
showLoading(`Uploading PDF (${fileSizeMB.toFixed(1)}MB)...`);
|
| 453 |
|
| 454 |
const formData = new FormData();
|
| 455 |
formData.append('file', file);
|
|
|
|
| 466 |
currentCacheId = result.cache_id;
|
| 467 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 468 |
document.getElementById('tokenCount').textContent = result.token_count;
|
| 469 |
+
document.getElementById('documentName').textContent = result.document_name;
|
| 470 |
+
document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
|
| 471 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 472 |
showSuccess('PDF uploaded and cached successfully!');
|
| 473 |
|
|
|
|
| 475 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 476 |
} else {
|
| 477 |
showError(result.error);
|
| 478 |
+
if (result.suggestion) {
|
| 479 |
+
showError(result.suggestion);
|
| 480 |
+
}
|
| 481 |
}
|
| 482 |
} catch (error) {
|
| 483 |
showError('Error uploading file: ' + error.message);
|
|
|
|
| 510 |
currentCacheId = result.cache_id;
|
| 511 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 512 |
document.getElementById('tokenCount').textContent = result.token_count;
|
| 513 |
+
document.getElementById('documentName').textContent = result.document_name;
|
| 514 |
+
document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
|
| 515 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 516 |
showSuccess('PDF uploaded and cached successfully!');
|
| 517 |
|
|
|
|
| 519 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 520 |
} else {
|
| 521 |
showError(result.error);
|
| 522 |
+
if (result.suggestion) {
|
| 523 |
+
showError(result.suggestion);
|
| 524 |
+
}
|
| 525 |
}
|
| 526 |
} catch (error) {
|
| 527 |
showError('Error uploading from URL: ' + error.message);
|
|
|
|
| 638 |
if file.filename == '':
|
| 639 |
return jsonify({'success': False, 'error': 'No file selected'})
|
| 640 |
|
| 641 |
+
# Check file size (limit to 200MB for large documents needed for caching)
|
| 642 |
file.seek(0, 2) # Seek to end
|
| 643 |
file_size = file.tell()
|
| 644 |
file.seek(0) # Reset to beginning
|
| 645 |
|
| 646 |
+
# Convert to MB for display
|
| 647 |
+
file_size_mb = file_size / (1024 * 1024)
|
| 648 |
+
|
| 649 |
+
if file_size > 200 * 1024 * 1024: # 200MB limit
|
| 650 |
+
return jsonify({'success': False, 'error': f'File too large ({file_size_mb:.1f}MB). Maximum size is 200MB.'})
|
| 651 |
+
|
| 652 |
+
# Warn about small files that might not cache
|
| 653 |
+
if file_size < 1024 * 1024: # Less than 1MB
|
| 654 |
+
print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
|
| 655 |
|
| 656 |
# Read file content
|
| 657 |
file_content = file.read()
|
|
|
|
| 678 |
try:
|
| 679 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 680 |
|
| 681 |
+
# Use the correct model name - try 2.5 Flash first (lower token requirement)
|
| 682 |
+
model = 'gemini-2.5-flash-001'
|
| 683 |
|
| 684 |
cache = client.caches.create(
|
| 685 |
model=model,
|
|
|
|
| 719 |
|
| 720 |
except Exception as cache_error:
|
| 721 |
print(f"Cache error: {cache_error}")
|
| 722 |
+
# Provide more specific error handling for token requirements
|
| 723 |
+
error_msg = str(cache_error).lower()
|
| 724 |
+
if "too small" in error_msg or "minimum" in error_msg:
|
| 725 |
return jsonify({
|
| 726 |
'success': False,
|
| 727 |
+
'error': f'Document content is insufficient for caching. Gemini 2.5 Flash requires minimum 1,024 tokens (~2-3 pages of text). Your document: {file.filename} ({file_size_mb:.1f}MB)',
|
| 728 |
+
'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).',
|
| 729 |
+
'fallback': 'You can still use the document without caching by implementing direct file processing.'
|
| 730 |
})
|
| 731 |
+
elif "invalid" in error_msg or "model" in error_msg:
|
| 732 |
+
# Try fallback to 2.0 Flash
|
| 733 |
+
try:
|
| 734 |
+
cache_fallback = client.caches.create(
|
| 735 |
+
model='gemini-2.0-flash-001',
|
| 736 |
+
config=types.CreateCachedContentConfig(
|
| 737 |
+
display_name=f'PDF document cache - {file.filename}',
|
| 738 |
+
system_instruction=system_instruction,
|
| 739 |
+
contents=[document],
|
| 740 |
+
ttl="3600s",
|
| 741 |
+
)
|
| 742 |
+
)
|
| 743 |
+
print(f"Fallback cache created with 2.0 Flash: {cache_fallback.name}")
|
| 744 |
+
|
| 745 |
+
# Store with fallback model info
|
| 746 |
+
cache_id = str(uuid.uuid4())
|
| 747 |
+
document_caches[cache_id] = {
|
| 748 |
+
'cache_name': cache_fallback.name,
|
| 749 |
+
'document_name': file.filename,
|
| 750 |
+
'document_file_name': document.name,
|
| 751 |
+
'model': 'gemini-2.0-flash-001',
|
| 752 |
+
'created_at': datetime.now().isoformat()
|
| 753 |
+
}
|
| 754 |
+
|
| 755 |
+
token_count = 'Unknown'
|
| 756 |
+
if hasattr(cache_fallback, 'usage_metadata') and cache_fallback.usage_metadata:
|
| 757 |
+
if hasattr(cache_fallback.usage_metadata, 'total_token_count'):
|
| 758 |
+
token_count = cache_fallback.usage_metadata.total_token_count
|
| 759 |
+
|
| 760 |
+
return jsonify({
|
| 761 |
+
'success': True,
|
| 762 |
+
'cache_id': cache_id,
|
| 763 |
+
'token_count': token_count,
|
| 764 |
+
'document_name': file.filename,
|
| 765 |
+
'model_used': 'gemini-2.0-flash-001'
|
| 766 |
+
})
|
| 767 |
+
|
| 768 |
+
except Exception as fallback_error:
|
| 769 |
+
print(f"Fallback cache error: {fallback_error}")
|
| 770 |
+
return jsonify({'success': False, 'error': f'Failed to create cache with both models: {str(fallback_error)}'})
|
| 771 |
else:
|
| 772 |
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
|
| 773 |
|
|
|
|
| 797 |
|
| 798 |
# Check file size
|
| 799 |
content_length = len(response.content)
|
| 800 |
+
content_length_mb = content_length / (1024 * 1024)
|
| 801 |
+
|
| 802 |
+
if content_length > 200 * 1024 * 1024: # 200MB limit
|
| 803 |
+
return jsonify({'success': False, 'error': f'File too large ({content_length_mb:.1f}MB). Maximum size is 200MB.'})
|
| 804 |
+
|
| 805 |
+
# Warn about small files
|
| 806 |
+
if content_length < 1024 * 1024: # Less than 1MB
|
| 807 |
+
print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
|
| 808 |
|
| 809 |
file_io = io.BytesIO(response.content)
|
| 810 |
|
|
|
|
| 836 |
try:
|
| 837 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 838 |
|
| 839 |
+
# Use the correct model name - try 2.5 Flash first (lower token requirement)
|
| 840 |
+
model = 'gemini-2.5-flash-001'
|
| 841 |
|
| 842 |
cache = client.caches.create(
|
| 843 |
model=model,
|
|
|
|
| 910 |
# Generate response using cached content with correct model format
|
| 911 |
try:
|
| 912 |
response = client.models.generate_content(
|
| 913 |
+
model='gemini-2.5-flash-001', # Use 2.5 Flash for consistency
|
| 914 |
contents=question,
|
| 915 |
config=types.GenerateContentConfig(
|
| 916 |
cached_content=cache_info['cache_name']
|
|
|
|
| 983 |
# Error handlers
|
| 984 |
@app.errorhandler(413)
|
| 985 |
def too_large(e):
|
| 986 |
+
return jsonify({'success': False, 'error': 'File too large. Maximum size is 200MB for substantial documents needed for context caching.'}), 413
|
| 987 |
|
| 988 |
@app.errorhandler(500)
|
| 989 |
def internal_error(e):
|