Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,6 @@ import os
|
|
| 6 |
import io
|
| 7 |
import httpx
|
| 8 |
import uuid
|
| 9 |
-
import tempfile
|
| 10 |
from datetime import datetime, timezone, timedelta
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
import json
|
|
@@ -14,19 +13,11 @@ import json
|
|
| 14 |
# Load environment variables
|
| 15 |
load_dotenv()
|
| 16 |
|
| 17 |
-
# Get Google API key from environment
|
| 18 |
-
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 19 |
-
if GOOGLE_API_KEY is None:
|
| 20 |
-
raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
|
| 21 |
-
|
| 22 |
app = Flask(__name__)
|
| 23 |
CORS(app)
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
# Initialize Gemini client with correct API key
|
| 29 |
-
client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 30 |
|
| 31 |
# In-memory storage for demo (in production, use a database)
|
| 32 |
document_caches = {}
|
|
@@ -331,8 +322,7 @@ HTML_TEMPLATE = """
|
|
| 331 |
<div class="container">
|
| 332 |
<div class="header">
|
| 333 |
<h1>π Smart Document Analysis Platform</h1>
|
| 334 |
-
<p>Upload
|
| 335 |
-
<p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">π‘ Context caching requires minimum token thresholds - larger documents work better</p>
|
| 336 |
</div>
|
| 337 |
|
| 338 |
<div class="main-content">
|
|
@@ -346,8 +336,6 @@ HTML_TEMPLATE = """
|
|
| 346 |
<div class="upload-area" id="uploadArea">
|
| 347 |
<div class="upload-icon">π</div>
|
| 348 |
<p>Drag and drop your PDF file here, or click to select</p>
|
| 349 |
-
<p style="font-size: 0.9em; color: #666; margin-top: 5px;">For context caching to work: Upload substantial documents (5MB+ recommended)</p>
|
| 350 |
-
<p style="font-size: 0.8em; color: #888; margin-top: 5px;">Maximum file size: 200MB</p>
|
| 351 |
<input type="file" id="fileInput" class="file-input" accept=".pdf">
|
| 352 |
<button class="upload-btn" onclick="document.getElementById('fileInput').click()">
|
| 353 |
Choose PDF File
|
|
@@ -378,12 +366,9 @@ HTML_TEMPLATE = """
|
|
| 378 |
|
| 379 |
<div id="cacheInfo" class="cache-info" style="display: none;">
|
| 380 |
<h3>β
Document Cached Successfully!</h3>
|
| 381 |
-
<p>Your PDF has been cached using Gemini API
|
| 382 |
-
<p><strong>Document:</strong> <span id="documentName"></span></p>
|
| 383 |
<p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
|
| 384 |
<p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
|
| 385 |
-
<p><strong>Model:</strong> <span id="modelUsed"></span></p>
|
| 386 |
-
<p style="font-size: 0.9em; margin-top: 10px; opacity: 0.8;">π‘ Cache valid for 1 hour. Subsequent questions will use cached content for faster responses.</p>
|
| 387 |
</div>
|
| 388 |
|
| 389 |
<div class="chat-container" id="chatContainer">
|
|
@@ -437,20 +422,7 @@ HTML_TEMPLATE = """
|
|
| 437 |
return;
|
| 438 |
}
|
| 439 |
|
| 440 |
-
|
| 441 |
-
const fileSizeMB = file.size / (1024 * 1024);
|
| 442 |
-
if (file.size > 200 * 1024 * 1024) {
|
| 443 |
-
showError(`File too large (${fileSizeMB.toFixed(1)}MB). Maximum size is 200MB.`);
|
| 444 |
-
return;
|
| 445 |
-
}
|
| 446 |
-
|
| 447 |
-
// Warn about small files that might not cache
|
| 448 |
-
if (file.size < 1024 * 1024) {
|
| 449 |
-
showError(`File might be too small (${fileSizeMB.toFixed(1)}MB) for context caching. For best results, upload documents with substantial text content (>5MB recommended).`);
|
| 450 |
-
return;
|
| 451 |
-
}
|
| 452 |
-
|
| 453 |
-
showLoading(`Uploading PDF (${fileSizeMB.toFixed(1)}MB)...`);
|
| 454 |
|
| 455 |
const formData = new FormData();
|
| 456 |
formData.append('file', file);
|
|
@@ -467,8 +439,6 @@ HTML_TEMPLATE = """
|
|
| 467 |
currentCacheId = result.cache_id;
|
| 468 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 469 |
document.getElementById('tokenCount').textContent = result.token_count;
|
| 470 |
-
document.getElementById('documentName').textContent = result.document_name;
|
| 471 |
-
document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.0-flash-001';
|
| 472 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 473 |
showSuccess('PDF uploaded and cached successfully!');
|
| 474 |
|
|
@@ -476,9 +446,6 @@ HTML_TEMPLATE = """
|
|
| 476 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 477 |
} else {
|
| 478 |
showError(result.error);
|
| 479 |
-
if (result.suggestion) {
|
| 480 |
-
showError(result.suggestion);
|
| 481 |
-
}
|
| 482 |
}
|
| 483 |
} catch (error) {
|
| 484 |
showError('Error uploading file: ' + error.message);
|
|
@@ -511,8 +478,6 @@ HTML_TEMPLATE = """
|
|
| 511 |
currentCacheId = result.cache_id;
|
| 512 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 513 |
document.getElementById('tokenCount').textContent = result.token_count;
|
| 514 |
-
document.getElementById('documentName').textContent = result.document_name;
|
| 515 |
-
document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.0-flash-001';
|
| 516 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 517 |
showSuccess('PDF uploaded and cached successfully!');
|
| 518 |
|
|
@@ -520,9 +485,6 @@ HTML_TEMPLATE = """
|
|
| 520 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 521 |
} else {
|
| 522 |
showError(result.error);
|
| 523 |
-
if (result.suggestion) {
|
| 524 |
-
showError(result.suggestion);
|
| 525 |
-
}
|
| 526 |
}
|
| 527 |
} catch (error) {
|
| 528 |
showError('Error uploading from URL: ' + error.message);
|
|
@@ -639,98 +601,60 @@ def upload_file():
|
|
| 639 |
if file.filename == '':
|
| 640 |
return jsonify({'success': False, 'error': 'No file selected'})
|
| 641 |
|
| 642 |
-
# Check file size (limit to 200MB for large documents needed for caching)
|
| 643 |
-
file.seek(0, 2) # Seek to end
|
| 644 |
-
file_size = file.tell()
|
| 645 |
-
file.seek(0) # Reset to beginning
|
| 646 |
-
|
| 647 |
-
# Convert to MB for display
|
| 648 |
-
file_size_mb = file_size / (1024 * 1024)
|
| 649 |
-
|
| 650 |
-
if file_size > 200 * 1024 * 1024: # 200MB limit
|
| 651 |
-
return jsonify({'success': False, 'error': f'File too large ({file_size_mb:.1f}MB). Maximum size is 200MB.'})
|
| 652 |
-
|
| 653 |
-
# Warn about small files that might not cache
|
| 654 |
-
if file_size < 1024 * 1024: # Less than 1MB
|
| 655 |
-
print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
|
| 656 |
-
|
| 657 |
# Read file content
|
| 658 |
file_content = file.read()
|
| 659 |
-
|
| 660 |
-
return jsonify({'success': False, 'error': 'File is empty'})
|
| 661 |
|
| 662 |
-
#
|
| 663 |
-
|
|
|
|
|
|
|
|
|
|
| 664 |
|
| 665 |
-
#
|
| 666 |
-
try:
|
| 667 |
-
document = client.files.upload(
|
| 668 |
-
file=doc_io,
|
| 669 |
-
config=dict(mime_type='application/pdf')
|
| 670 |
-
)
|
| 671 |
-
print(f"Document uploaded successfully: {document.name}")
|
| 672 |
-
except Exception as upload_error:
|
| 673 |
-
print(f"Upload error: {upload_error}")
|
| 674 |
-
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
|
| 675 |
-
|
| 676 |
-
# Create cache with system instruction using exact pattern from documentation
|
| 677 |
try:
|
| 678 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 679 |
|
| 680 |
-
# Use the model
|
| 681 |
-
|
| 682 |
|
| 683 |
-
# Create cached content object exactly as shown in documentation
|
| 684 |
cache = client.caches.create(
|
| 685 |
-
model=
|
| 686 |
config=types.CreateCachedContentConfig(
|
|
|
|
| 687 |
system_instruction=system_instruction,
|
| 688 |
-
contents=[document],
|
|
|
|
| 689 |
)
|
| 690 |
)
|
| 691 |
|
| 692 |
-
print(f"Cache created successfully: {cache.name}")
|
| 693 |
-
|
| 694 |
# Store cache info
|
| 695 |
cache_id = str(uuid.uuid4())
|
| 696 |
document_caches[cache_id] = {
|
| 697 |
'cache_name': cache.name,
|
| 698 |
'document_name': file.filename,
|
| 699 |
-
'document_file_name': document.name,
|
| 700 |
'created_at': datetime.now().isoformat()
|
| 701 |
}
|
| 702 |
|
| 703 |
-
# Get token count safely
|
| 704 |
-
token_count = 'Unknown'
|
| 705 |
-
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
|
| 706 |
-
if hasattr(cache.usage_metadata, 'total_token_count'):
|
| 707 |
-
token_count = cache.usage_metadata.total_token_count
|
| 708 |
-
elif hasattr(cache.usage_metadata, 'cached_token_count'):
|
| 709 |
-
token_count = cache.usage_metadata.cached_token_count
|
| 710 |
-
|
| 711 |
return jsonify({
|
| 712 |
'success': True,
|
| 713 |
'cache_id': cache_id,
|
| 714 |
-
'token_count':
|
| 715 |
-
'document_name': file.filename
|
| 716 |
})
|
| 717 |
|
| 718 |
except Exception as cache_error:
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
error_msg = str(cache_error).lower()
|
| 722 |
-
if "too small" in error_msg or "minimum" in error_msg:
|
| 723 |
return jsonify({
|
| 724 |
'success': False,
|
| 725 |
-
'error':
|
| 726 |
-
'suggestion': '
|
| 727 |
})
|
| 728 |
else:
|
| 729 |
-
|
| 730 |
|
| 731 |
except Exception as e:
|
| 732 |
-
|
| 733 |
-
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
| 734 |
|
| 735 |
@app.route('/upload-url', methods=['POST'])
|
| 736 |
def upload_from_url():
|
|
@@ -741,110 +665,62 @@ def upload_from_url():
|
|
| 741 |
if not url:
|
| 742 |
return jsonify({'success': False, 'error': 'No URL provided'})
|
| 743 |
|
| 744 |
-
# Download file from URL
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
response = client_http.get(url)
|
| 748 |
-
response.raise_for_status()
|
| 749 |
-
|
| 750 |
-
# Check content type
|
| 751 |
-
content_type = response.headers.get('content-type', '').lower()
|
| 752 |
-
if 'pdf' not in content_type and not url.lower().endswith('.pdf'):
|
| 753 |
-
return jsonify({'success': False, 'error': 'URL does not point to a PDF file'})
|
| 754 |
-
|
| 755 |
-
# Check file size
|
| 756 |
-
content_length = len(response.content)
|
| 757 |
-
content_length_mb = content_length / (1024 * 1024)
|
| 758 |
-
|
| 759 |
-
if content_length > 200 * 1024 * 1024: # 200MB limit
|
| 760 |
-
return jsonify({'success': False, 'error': f'File too large ({content_length_mb:.1f}MB). Maximum size is 200MB.'})
|
| 761 |
-
|
| 762 |
-
# Warn about small files
|
| 763 |
-
if content_length < 1024 * 1024: # Less than 1MB
|
| 764 |
-
print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
|
| 765 |
-
|
| 766 |
-
except httpx.TimeoutException:
|
| 767 |
-
return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
|
| 768 |
-
except httpx.HTTPError as e:
|
| 769 |
-
return jsonify({'success': False, 'error': f'Failed to download file: {str(e)}'})
|
| 770 |
-
|
| 771 |
-
# Extract filename from URL
|
| 772 |
-
filename = url.split('/')[-1]
|
| 773 |
-
if not filename.endswith('.pdf'):
|
| 774 |
-
filename += '.pdf'
|
| 775 |
|
| 776 |
-
|
| 777 |
-
doc_io = io.BytesIO(response.content)
|
| 778 |
|
| 779 |
-
# Upload to Gemini File API
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
)
|
| 785 |
-
print(f"Document uploaded successfully: {document.name}")
|
| 786 |
-
except Exception as upload_error:
|
| 787 |
-
print(f"Upload error: {upload_error}")
|
| 788 |
-
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
|
| 789 |
|
| 790 |
-
# Create cache with system instruction
|
| 791 |
try:
|
| 792 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 793 |
|
| 794 |
-
# Use the model
|
| 795 |
-
|
| 796 |
|
| 797 |
-
# Create cached content object exactly as shown in documentation
|
| 798 |
cache = client.caches.create(
|
| 799 |
-
model=
|
| 800 |
config=types.CreateCachedContentConfig(
|
|
|
|
| 801 |
system_instruction=system_instruction,
|
| 802 |
-
contents=[document],
|
|
|
|
| 803 |
)
|
| 804 |
)
|
| 805 |
|
| 806 |
-
print(f"Cache created successfully: {cache.name}")
|
| 807 |
-
|
| 808 |
# Store cache info
|
| 809 |
cache_id = str(uuid.uuid4())
|
| 810 |
document_caches[cache_id] = {
|
| 811 |
'cache_name': cache.name,
|
| 812 |
-
'document_name':
|
| 813 |
-
'source_url': url,
|
| 814 |
'created_at': datetime.now().isoformat()
|
| 815 |
}
|
| 816 |
|
| 817 |
-
# Get token count safely
|
| 818 |
-
token_count = 'Unknown'
|
| 819 |
-
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
|
| 820 |
-
if hasattr(cache.usage_metadata, 'total_token_count'):
|
| 821 |
-
token_count = cache.usage_metadata.total_token_count
|
| 822 |
-
elif hasattr(cache.usage_metadata, 'cached_token_count'):
|
| 823 |
-
token_count = cache.usage_metadata.cached_token_count
|
| 824 |
-
|
| 825 |
return jsonify({
|
| 826 |
'success': True,
|
| 827 |
'cache_id': cache_id,
|
| 828 |
-
'token_count':
|
| 829 |
-
'document_name': filename
|
| 830 |
})
|
| 831 |
|
| 832 |
except Exception as cache_error:
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
error_msg = str(cache_error).lower()
|
| 836 |
-
if "too small" in error_msg or "minimum" in error_msg:
|
| 837 |
return jsonify({
|
| 838 |
'success': False,
|
| 839 |
-
'error':
|
| 840 |
-
'suggestion': 'Try a longer document
|
| 841 |
})
|
| 842 |
else:
|
| 843 |
-
|
| 844 |
|
| 845 |
except Exception as e:
|
| 846 |
-
|
| 847 |
-
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
| 848 |
|
| 849 |
@app.route('/ask', methods=['POST'])
|
| 850 |
def ask_question():
|
|
@@ -857,38 +733,26 @@ def ask_question():
|
|
| 857 |
return jsonify({'success': False, 'error': 'Missing question or cache_id'})
|
| 858 |
|
| 859 |
if cache_id not in document_caches:
|
| 860 |
-
return jsonify({'success': False, 'error': 'Cache not found
|
| 861 |
|
| 862 |
cache_info = document_caches[cache_id]
|
| 863 |
|
| 864 |
# Generate response using cached content with correct model format
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
cached_content=cache_info['cache_name']
|
| 871 |
-
)
|
| 872 |
)
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
else:
|
| 880 |
-
return jsonify({
|
| 881 |
-
'success': False,
|
| 882 |
-
'error': 'No response generated from the model'
|
| 883 |
-
})
|
| 884 |
-
|
| 885 |
-
except Exception as gen_error:
|
| 886 |
-
print(f"Generation error: {gen_error}")
|
| 887 |
-
return jsonify({'success': False, 'error': f'Failed to generate response: {str(gen_error)}'})
|
| 888 |
|
| 889 |
except Exception as e:
|
| 890 |
-
|
| 891 |
-
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
| 892 |
|
| 893 |
@app.route('/caches', methods=['GET'])
|
| 894 |
def list_caches():
|
|
@@ -915,11 +779,7 @@ def delete_cache(cache_id):
|
|
| 915 |
cache_info = document_caches[cache_id]
|
| 916 |
|
| 917 |
# Delete from Gemini API
|
| 918 |
-
|
| 919 |
-
client.caches.delete(cache_info['cache_name'])
|
| 920 |
-
except Exception as delete_error:
|
| 921 |
-
print(f"Error deleting cache from Gemini API: {delete_error}")
|
| 922 |
-
# Continue to remove from local storage even if API deletion fails
|
| 923 |
|
| 924 |
# Remove from local storage
|
| 925 |
del document_caches[cache_id]
|
|
@@ -929,23 +789,7 @@ def delete_cache(cache_id):
|
|
| 929 |
except Exception as e:
|
| 930 |
return jsonify({'success': False, 'error': str(e)})
|
| 931 |
|
| 932 |
-
# Health check endpoint
|
| 933 |
-
@app.route('/health', methods=['GET'])
|
| 934 |
-
def health_check():
|
| 935 |
-
return jsonify({'status': 'healthy', 'service': 'Smart Document Analysis Platform'})
|
| 936 |
-
|
| 937 |
-
# Error handlers
|
| 938 |
-
@app.errorhandler(413)
|
| 939 |
-
def too_large(e):
|
| 940 |
-
return jsonify({'success': False, 'error': 'File too large. Maximum size is 200MB for substantial documents needed for context caching.'}), 413
|
| 941 |
-
|
| 942 |
-
@app.errorhandler(500)
|
| 943 |
-
def internal_error(e):
|
| 944 |
-
return jsonify({'success': False, 'error': 'Internal server error'}), 500
|
| 945 |
-
|
| 946 |
if __name__ == '__main__':
|
| 947 |
import os
|
| 948 |
port = int(os.environ.get("PORT", 7860))
|
| 949 |
-
|
| 950 |
-
print(f"Google API Key configured: {'Yes' if GOOGLE_API_KEY else 'No'}")
|
| 951 |
-
app.run(debug=False, host='0.0.0.0', port=port)
|
|
|
|
| 6 |
import io
|
| 7 |
import httpx
|
| 8 |
import uuid
|
|
|
|
| 9 |
from datetime import datetime, timezone, timedelta
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import json
|
|
|
|
| 13 |
# Load environment variables
|
| 14 |
load_dotenv()
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
app = Flask(__name__)
|
| 17 |
CORS(app)
|
| 18 |
|
| 19 |
+
# Initialize Gemini client
|
| 20 |
+
client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# In-memory storage for demo (in production, use a database)
|
| 23 |
document_caches = {}
|
|
|
|
| 322 |
<div class="container">
|
| 323 |
<div class="header">
|
| 324 |
<h1>π Smart Document Analysis Platform</h1>
|
| 325 |
+
<p>Upload PDF documents once, ask questions forever with Gemini API caching</p>
|
|
|
|
| 326 |
</div>
|
| 327 |
|
| 328 |
<div class="main-content">
|
|
|
|
| 336 |
<div class="upload-area" id="uploadArea">
|
| 337 |
<div class="upload-icon">π</div>
|
| 338 |
<p>Drag and drop your PDF file here, or click to select</p>
|
|
|
|
|
|
|
| 339 |
<input type="file" id="fileInput" class="file-input" accept=".pdf">
|
| 340 |
<button class="upload-btn" onclick="document.getElementById('fileInput').click()">
|
| 341 |
Choose PDF File
|
|
|
|
| 366 |
|
| 367 |
<div id="cacheInfo" class="cache-info" style="display: none;">
|
| 368 |
<h3>β
Document Cached Successfully!</h3>
|
| 369 |
+
<p>Your PDF has been cached using Gemini API. You can now ask multiple questions without re-uploading.</p>
|
|
|
|
| 370 |
<p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
|
| 371 |
<p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
|
|
|
|
|
|
|
| 372 |
</div>
|
| 373 |
|
| 374 |
<div class="chat-container" id="chatContainer">
|
|
|
|
| 422 |
return;
|
| 423 |
}
|
| 424 |
|
| 425 |
+
showLoading('Uploading PDF...');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
const formData = new FormData();
|
| 428 |
formData.append('file', file);
|
|
|
|
| 439 |
currentCacheId = result.cache_id;
|
| 440 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 441 |
document.getElementById('tokenCount').textContent = result.token_count;
|
|
|
|
|
|
|
| 442 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 443 |
showSuccess('PDF uploaded and cached successfully!');
|
| 444 |
|
|
|
|
| 446 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 447 |
} else {
|
| 448 |
showError(result.error);
|
|
|
|
|
|
|
|
|
|
| 449 |
}
|
| 450 |
} catch (error) {
|
| 451 |
showError('Error uploading file: ' + error.message);
|
|
|
|
| 478 |
currentCacheId = result.cache_id;
|
| 479 |
document.getElementById('cacheId').textContent = result.cache_id;
|
| 480 |
document.getElementById('tokenCount').textContent = result.token_count;
|
|
|
|
|
|
|
| 481 |
document.getElementById('cacheInfo').style.display = 'block';
|
| 482 |
showSuccess('PDF uploaded and cached successfully!');
|
| 483 |
|
|
|
|
| 485 |
addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
|
| 486 |
} else {
|
| 487 |
showError(result.error);
|
|
|
|
|
|
|
|
|
|
| 488 |
}
|
| 489 |
} catch (error) {
|
| 490 |
showError('Error uploading from URL: ' + error.message);
|
|
|
|
| 601 |
if file.filename == '':
|
| 602 |
return jsonify({'success': False, 'error': 'No file selected'})
|
| 603 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 604 |
# Read file content
|
| 605 |
file_content = file.read()
|
| 606 |
+
file_io = io.BytesIO(file_content)
|
|
|
|
| 607 |
|
| 608 |
+
# Upload to Gemini File API
|
| 609 |
+
document = client.files.upload(
|
| 610 |
+
file=file_io,
|
| 611 |
+
config=dict(mime_type='application/pdf')
|
| 612 |
+
)
|
| 613 |
|
| 614 |
+
# Create cache with system instruction
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
try:
|
| 616 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 617 |
|
| 618 |
+
# Use the correct model format as per documentation
|
| 619 |
+
model = 'models/gemini-2.0-flash-001'
|
| 620 |
|
|
|
|
| 621 |
cache = client.caches.create(
|
| 622 |
+
model=model,
|
| 623 |
config=types.CreateCachedContentConfig(
|
| 624 |
+
display_name='pdf document cache',
|
| 625 |
system_instruction=system_instruction,
|
| 626 |
+
contents=[document],
|
| 627 |
+
ttl="3600s", # 1 hour TTL
|
| 628 |
)
|
| 629 |
)
|
| 630 |
|
|
|
|
|
|
|
| 631 |
# Store cache info
|
| 632 |
cache_id = str(uuid.uuid4())
|
| 633 |
document_caches[cache_id] = {
|
| 634 |
'cache_name': cache.name,
|
| 635 |
'document_name': file.filename,
|
|
|
|
| 636 |
'created_at': datetime.now().isoformat()
|
| 637 |
}
|
| 638 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
return jsonify({
|
| 640 |
'success': True,
|
| 641 |
'cache_id': cache_id,
|
| 642 |
+
'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown')
|
|
|
|
| 643 |
})
|
| 644 |
|
| 645 |
except Exception as cache_error:
|
| 646 |
+
# If caching fails due to small content, provide alternative approach
|
| 647 |
+
if "Cached content is too small" in str(cache_error):
|
|
|
|
|
|
|
| 648 |
return jsonify({
|
| 649 |
'success': False,
|
| 650 |
+
'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).',
|
| 651 |
+
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
| 652 |
})
|
| 653 |
else:
|
| 654 |
+
raise cache_error
|
| 655 |
|
| 656 |
except Exception as e:
|
| 657 |
+
return jsonify({'success': False, 'error': str(e)})
|
|
|
|
| 658 |
|
| 659 |
@app.route('/upload-url', methods=['POST'])
|
| 660 |
def upload_from_url():
|
|
|
|
| 665 |
if not url:
|
| 666 |
return jsonify({'success': False, 'error': 'No URL provided'})
|
| 667 |
|
| 668 |
+
# Download file from URL
|
| 669 |
+
response = httpx.get(url)
|
| 670 |
+
response.raise_for_status()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
|
| 672 |
+
file_io = io.BytesIO(response.content)
|
|
|
|
| 673 |
|
| 674 |
+
# Upload to Gemini File API
|
| 675 |
+
document = client.files.upload(
|
| 676 |
+
file=file_io,
|
| 677 |
+
config=dict(mime_type='application/pdf')
|
| 678 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
|
| 680 |
+
# Create cache with system instruction
|
| 681 |
try:
|
| 682 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 683 |
|
| 684 |
+
# Use the correct model format as per documentation
|
| 685 |
+
model = 'models/gemini-2.0-flash-001'
|
| 686 |
|
|
|
|
| 687 |
cache = client.caches.create(
|
| 688 |
+
model=model,
|
| 689 |
config=types.CreateCachedContentConfig(
|
| 690 |
+
display_name='pdf document cache',
|
| 691 |
system_instruction=system_instruction,
|
| 692 |
+
contents=[document],
|
| 693 |
+
ttl="3600s", # 1 hour TTL
|
| 694 |
)
|
| 695 |
)
|
| 696 |
|
|
|
|
|
|
|
| 697 |
# Store cache info
|
| 698 |
cache_id = str(uuid.uuid4())
|
| 699 |
document_caches[cache_id] = {
|
| 700 |
'cache_name': cache.name,
|
| 701 |
+
'document_name': url,
|
|
|
|
| 702 |
'created_at': datetime.now().isoformat()
|
| 703 |
}
|
| 704 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 705 |
return jsonify({
|
| 706 |
'success': True,
|
| 707 |
'cache_id': cache_id,
|
| 708 |
+
'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown')
|
|
|
|
| 709 |
})
|
| 710 |
|
| 711 |
except Exception as cache_error:
|
| 712 |
+
# If caching fails due to small content, provide alternative approach
|
| 713 |
+
if "Cached content is too small" in str(cache_error):
|
|
|
|
|
|
|
| 714 |
return jsonify({
|
| 715 |
'success': False,
|
| 716 |
+
'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).',
|
| 717 |
+
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
| 718 |
})
|
| 719 |
else:
|
| 720 |
+
raise cache_error
|
| 721 |
|
| 722 |
except Exception as e:
|
| 723 |
+
return jsonify({'success': False, 'error': str(e)})
|
|
|
|
| 724 |
|
| 725 |
@app.route('/ask', methods=['POST'])
|
| 726 |
def ask_question():
|
|
|
|
| 733 |
return jsonify({'success': False, 'error': 'Missing question or cache_id'})
|
| 734 |
|
| 735 |
if cache_id not in document_caches:
|
| 736 |
+
return jsonify({'success': False, 'error': 'Cache not found'})
|
| 737 |
|
| 738 |
cache_info = document_caches[cache_id]
|
| 739 |
|
| 740 |
# Generate response using cached content with correct model format
|
| 741 |
+
response = client.models.generate_content(
|
| 742 |
+
model='models/gemini-2.0-flash-001',
|
| 743 |
+
contents=question,
|
| 744 |
+
config=types.GenerateContentConfig(
|
| 745 |
+
cached_content=cache_info['cache_name']
|
|
|
|
|
|
|
| 746 |
)
|
| 747 |
+
)
|
| 748 |
+
|
| 749 |
+
return jsonify({
|
| 750 |
+
'success': True,
|
| 751 |
+
'answer': response.text
|
| 752 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
|
| 754 |
except Exception as e:
|
| 755 |
+
return jsonify({'success': False, 'error': str(e)})
|
|
|
|
| 756 |
|
| 757 |
@app.route('/caches', methods=['GET'])
|
| 758 |
def list_caches():
|
|
|
|
| 779 |
cache_info = document_caches[cache_id]
|
| 780 |
|
| 781 |
# Delete from Gemini API
|
| 782 |
+
client.caches.delete(cache_info['cache_name'])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 783 |
|
| 784 |
# Remove from local storage
|
| 785 |
del document_caches[cache_id]
|
|
|
|
| 789 |
except Exception as e:
|
| 790 |
return jsonify({'success': False, 'error': str(e)})
|
| 791 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 792 |
if __name__ == '__main__':
|
| 793 |
import os
|
| 794 |
port = int(os.environ.get("PORT", 7860))
|
| 795 |
+
app.run(debug=True, host='0.0.0.0', port=port)
|
|
|
|
|
|