Update app.py
Browse files
app.py
CHANGED
|
@@ -9,23 +9,20 @@ import uuid
|
|
| 9 |
from datetime import datetime, timezone, timedelta
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import json
|
| 12 |
-
from huggingface_hub import HfApi
|
| 13 |
|
| 14 |
-
# Load
|
|
|
|
|
|
|
|
|
|
| 15 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 16 |
if GOOGLE_API_KEY is None:
|
| 17 |
raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
|
| 18 |
|
| 19 |
-
|
| 20 |
-
hf_api = HfApi(
|
| 21 |
-
token= GOOGLE_API_KEY, # Token is not persisted on the machine.
|
| 22 |
-
)
|
| 23 |
-
|
| 24 |
app = Flask(__name__)
|
| 25 |
CORS(app)
|
| 26 |
|
| 27 |
-
# Initialize Gemini client
|
| 28 |
-
client = genai.Client(api_key=
|
| 29 |
|
| 30 |
# In-memory storage for demo (in production, use a database)
|
| 31 |
document_caches = {}
|
|
@@ -609,60 +606,93 @@ def upload_file():
|
|
| 609 |
if file.filename == '':
|
| 610 |
return jsonify({'success': False, 'error': 'No file selected'})
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
# Read file content
|
| 613 |
file_content = file.read()
|
|
|
|
|
|
|
|
|
|
| 614 |
file_io = io.BytesIO(file_content)
|
| 615 |
|
| 616 |
# Upload to Gemini File API
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
|
| 622 |
# Create cache with system instruction
|
| 623 |
try:
|
| 624 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 625 |
|
| 626 |
-
# Use the correct model
|
| 627 |
-
model = '
|
| 628 |
|
| 629 |
cache = client.caches.create(
|
| 630 |
model=model,
|
| 631 |
config=types.CreateCachedContentConfig(
|
| 632 |
-
display_name='
|
| 633 |
system_instruction=system_instruction,
|
| 634 |
contents=[document],
|
| 635 |
ttl="3600s", # 1 hour TTL
|
| 636 |
)
|
| 637 |
)
|
| 638 |
|
|
|
|
|
|
|
| 639 |
# Store cache info
|
| 640 |
cache_id = str(uuid.uuid4())
|
| 641 |
document_caches[cache_id] = {
|
| 642 |
'cache_name': cache.name,
|
| 643 |
'document_name': file.filename,
|
|
|
|
| 644 |
'created_at': datetime.now().isoformat()
|
| 645 |
}
|
| 646 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
return jsonify({
|
| 648 |
'success': True,
|
| 649 |
'cache_id': cache_id,
|
| 650 |
-
'token_count':
|
|
|
|
| 651 |
})
|
| 652 |
|
| 653 |
except Exception as cache_error:
|
|
|
|
| 654 |
# If caching fails due to small content, provide alternative approach
|
| 655 |
-
if "
|
| 656 |
return jsonify({
|
| 657 |
'success': False,
|
| 658 |
-
'error': 'PDF is too small for caching. Please upload a larger document
|
| 659 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
| 660 |
})
|
| 661 |
else:
|
| 662 |
-
|
| 663 |
|
| 664 |
except Exception as e:
|
| 665 |
-
|
|
|
|
| 666 |
|
| 667 |
@app.route('/upload-url', methods=['POST'])
|
| 668 |
def upload_from_url():
|
|
@@ -673,62 +703,107 @@ def upload_from_url():
|
|
| 673 |
if not url:
|
| 674 |
return jsonify({'success': False, 'error': 'No URL provided'})
|
| 675 |
|
| 676 |
-
# Download file from URL
|
| 677 |
-
|
| 678 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
|
| 680 |
-
|
|
|
|
|
|
|
|
|
|
| 681 |
|
| 682 |
# Upload to Gemini File API
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
|
| 688 |
# Create cache with system instruction
|
| 689 |
try:
|
| 690 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 691 |
|
| 692 |
-
# Use the correct model
|
| 693 |
-
model = '
|
| 694 |
|
| 695 |
cache = client.caches.create(
|
| 696 |
model=model,
|
| 697 |
config=types.CreateCachedContentConfig(
|
| 698 |
-
display_name='
|
| 699 |
system_instruction=system_instruction,
|
| 700 |
contents=[document],
|
| 701 |
ttl="3600s", # 1 hour TTL
|
| 702 |
)
|
| 703 |
)
|
| 704 |
|
|
|
|
|
|
|
| 705 |
# Store cache info
|
| 706 |
cache_id = str(uuid.uuid4())
|
| 707 |
document_caches[cache_id] = {
|
| 708 |
'cache_name': cache.name,
|
| 709 |
-
'document_name':
|
|
|
|
|
|
|
| 710 |
'created_at': datetime.now().isoformat()
|
| 711 |
}
|
| 712 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
return jsonify({
|
| 714 |
'success': True,
|
| 715 |
'cache_id': cache_id,
|
| 716 |
-
'token_count':
|
|
|
|
| 717 |
})
|
| 718 |
|
| 719 |
except Exception as cache_error:
|
|
|
|
| 720 |
# If caching fails due to small content, provide alternative approach
|
| 721 |
-
if "
|
| 722 |
return jsonify({
|
| 723 |
'success': False,
|
| 724 |
-
'error': 'PDF is too small for caching. Please upload a larger document
|
| 725 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
| 726 |
})
|
| 727 |
else:
|
| 728 |
-
|
| 729 |
|
| 730 |
except Exception as e:
|
| 731 |
-
|
|
|
|
| 732 |
|
| 733 |
@app.route('/ask', methods=['POST'])
|
| 734 |
def ask_question():
|
|
@@ -741,26 +816,38 @@ def ask_question():
|
|
| 741 |
return jsonify({'success': False, 'error': 'Missing question or cache_id'})
|
| 742 |
|
| 743 |
if cache_id not in document_caches:
|
| 744 |
-
return jsonify({'success': False, 'error': 'Cache not found'})
|
| 745 |
|
| 746 |
cache_info = document_caches[cache_id]
|
| 747 |
|
| 748 |
# Generate response using cached content with correct model format
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
|
|
|
|
|
|
| 754 |
)
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
|
| 762 |
except Exception as e:
|
| 763 |
-
|
|
|
|
| 764 |
|
| 765 |
@app.route('/caches', methods=['GET'])
|
| 766 |
def list_caches():
|
|
@@ -787,7 +874,11 @@ def delete_cache(cache_id):
|
|
| 787 |
cache_info = document_caches[cache_id]
|
| 788 |
|
| 789 |
# Delete from Gemini API
|
| 790 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
|
| 792 |
# Remove from local storage
|
| 793 |
del document_caches[cache_id]
|
|
@@ -797,7 +888,23 @@ def delete_cache(cache_id):
|
|
| 797 |
except Exception as e:
|
| 798 |
return jsonify({'success': False, 'error': str(e)})
|
| 799 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 800 |
if __name__ == '__main__':
|
| 801 |
import os
|
| 802 |
port = int(os.environ.get("PORT", 7860))
|
| 803 |
-
|
|
|
|
|
|
|
|
|
| 9 |
from datetime import datetime, timezone, timedelta
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import json
|
|
|
|
| 12 |
|
| 13 |
+
# Load environment variables
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
# Get Google API key from environment
|
| 17 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 18 |
if GOOGLE_API_KEY is None:
|
| 19 |
raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
app = Flask(__name__)
|
| 22 |
CORS(app)
|
| 23 |
|
| 24 |
+
# Initialize Gemini client with correct API key
|
| 25 |
+
client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 26 |
|
| 27 |
# In-memory storage for demo (in production, use a database)
|
| 28 |
document_caches = {}
|
|
|
|
| 606 |
if file.filename == '':
|
| 607 |
return jsonify({'success': False, 'error': 'No file selected'})
|
| 608 |
|
| 609 |
+
# Check file size (limit to 10MB for example)
|
| 610 |
+
file.seek(0, 2) # Seek to end
|
| 611 |
+
file_size = file.tell()
|
| 612 |
+
file.seek(0) # Reset to beginning
|
| 613 |
+
|
| 614 |
+
if file_size > 10 * 1024 * 1024: # 10MB limit
|
| 615 |
+
return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB.'})
|
| 616 |
+
|
| 617 |
# Read file content
|
| 618 |
file_content = file.read()
|
| 619 |
+
if not file_content:
|
| 620 |
+
return jsonify({'success': False, 'error': 'File is empty'})
|
| 621 |
+
|
| 622 |
file_io = io.BytesIO(file_content)
|
| 623 |
|
| 624 |
# Upload to Gemini File API
|
| 625 |
+
try:
|
| 626 |
+
document = client.files.upload(
|
| 627 |
+
file=file_io,
|
| 628 |
+
config=types.FileUploadConfig(
|
| 629 |
+
mime_type='application/pdf',
|
| 630 |
+
display_name=file.filename
|
| 631 |
+
)
|
| 632 |
+
)
|
| 633 |
+
print(f"Document uploaded successfully: {document.name}")
|
| 634 |
+
except Exception as upload_error:
|
| 635 |
+
print(f"Upload error: {upload_error}")
|
| 636 |
+
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
|
| 637 |
|
| 638 |
# Create cache with system instruction
|
| 639 |
try:
|
| 640 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 641 |
|
| 642 |
+
# Use the correct model name (without 'models/' prefix)
|
| 643 |
+
model = 'gemini-2.0-flash-001'
|
| 644 |
|
| 645 |
cache = client.caches.create(
|
| 646 |
model=model,
|
| 647 |
config=types.CreateCachedContentConfig(
|
| 648 |
+
display_name=f'PDF document cache - {file.filename}',
|
| 649 |
system_instruction=system_instruction,
|
| 650 |
contents=[document],
|
| 651 |
ttl="3600s", # 1 hour TTL
|
| 652 |
)
|
| 653 |
)
|
| 654 |
|
| 655 |
+
print(f"Cache created successfully: {cache.name}")
|
| 656 |
+
|
| 657 |
# Store cache info
|
| 658 |
cache_id = str(uuid.uuid4())
|
| 659 |
document_caches[cache_id] = {
|
| 660 |
'cache_name': cache.name,
|
| 661 |
'document_name': file.filename,
|
| 662 |
+
'document_file_name': document.name,
|
| 663 |
'created_at': datetime.now().isoformat()
|
| 664 |
}
|
| 665 |
|
| 666 |
+
# Get token count safely
|
| 667 |
+
token_count = 'Unknown'
|
| 668 |
+
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
|
| 669 |
+
if hasattr(cache.usage_metadata, 'total_token_count'):
|
| 670 |
+
token_count = cache.usage_metadata.total_token_count
|
| 671 |
+
elif hasattr(cache.usage_metadata, 'cached_token_count'):
|
| 672 |
+
token_count = cache.usage_metadata.cached_token_count
|
| 673 |
+
|
| 674 |
return jsonify({
|
| 675 |
'success': True,
|
| 676 |
'cache_id': cache_id,
|
| 677 |
+
'token_count': token_count,
|
| 678 |
+
'document_name': file.filename
|
| 679 |
})
|
| 680 |
|
| 681 |
except Exception as cache_error:
|
| 682 |
+
print(f"Cache error: {cache_error}")
|
| 683 |
# If caching fails due to small content, provide alternative approach
|
| 684 |
+
if "too small" in str(cache_error).lower():
|
| 685 |
return jsonify({
|
| 686 |
'success': False,
|
| 687 |
+
'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
|
| 688 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
| 689 |
})
|
| 690 |
else:
|
| 691 |
+
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
|
| 692 |
|
| 693 |
except Exception as e:
|
| 694 |
+
print(f"General error: {e}")
|
| 695 |
+
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
| 696 |
|
| 697 |
@app.route('/upload-url', methods=['POST'])
|
| 698 |
def upload_from_url():
|
|
|
|
| 703 |
if not url:
|
| 704 |
return jsonify({'success': False, 'error': 'No URL provided'})
|
| 705 |
|
| 706 |
+
# Download file from URL with timeout and size limits
|
| 707 |
+
try:
|
| 708 |
+
with httpx.Client(timeout=30.0) as client_http:
|
| 709 |
+
response = client_http.get(url)
|
| 710 |
+
response.raise_for_status()
|
| 711 |
+
|
| 712 |
+
# Check content type
|
| 713 |
+
content_type = response.headers.get('content-type', '').lower()
|
| 714 |
+
if 'pdf' not in content_type and not url.lower().endswith('.pdf'):
|
| 715 |
+
return jsonify({'success': False, 'error': 'URL does not point to a PDF file'})
|
| 716 |
+
|
| 717 |
+
# Check file size
|
| 718 |
+
content_length = len(response.content)
|
| 719 |
+
if content_length > 10 * 1024 * 1024: # 10MB limit
|
| 720 |
+
return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB.'})
|
| 721 |
+
|
| 722 |
+
file_io = io.BytesIO(response.content)
|
| 723 |
+
|
| 724 |
+
except httpx.TimeoutException:
|
| 725 |
+
return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
|
| 726 |
+
except httpx.HTTPError as e:
|
| 727 |
+
return jsonify({'success': False, 'error': f'Failed to download file: {str(e)}'})
|
| 728 |
|
| 729 |
+
# Extract filename from URL
|
| 730 |
+
filename = url.split('/')[-1]
|
| 731 |
+
if not filename.endswith('.pdf'):
|
| 732 |
+
filename += '.pdf'
|
| 733 |
|
| 734 |
# Upload to Gemini File API
|
| 735 |
+
try:
|
| 736 |
+
document = client.files.upload(
|
| 737 |
+
file=file_io,
|
| 738 |
+
config=types.FileUploadConfig(
|
| 739 |
+
mime_type='application/pdf',
|
| 740 |
+
display_name=filename
|
| 741 |
+
)
|
| 742 |
+
)
|
| 743 |
+
print(f"Document uploaded successfully: {document.name}")
|
| 744 |
+
except Exception as upload_error:
|
| 745 |
+
print(f"Upload error: {upload_error}")
|
| 746 |
+
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
|
| 747 |
|
| 748 |
# Create cache with system instruction
|
| 749 |
try:
|
| 750 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
| 751 |
|
| 752 |
+
# Use the correct model name (without 'models/' prefix)
|
| 753 |
+
model = 'gemini-2.0-flash-001'
|
| 754 |
|
| 755 |
cache = client.caches.create(
|
| 756 |
model=model,
|
| 757 |
config=types.CreateCachedContentConfig(
|
| 758 |
+
display_name=f'PDF document cache - {filename}',
|
| 759 |
system_instruction=system_instruction,
|
| 760 |
contents=[document],
|
| 761 |
ttl="3600s", # 1 hour TTL
|
| 762 |
)
|
| 763 |
)
|
| 764 |
|
| 765 |
+
print(f"Cache created successfully: {cache.name}")
|
| 766 |
+
|
| 767 |
# Store cache info
|
| 768 |
cache_id = str(uuid.uuid4())
|
| 769 |
document_caches[cache_id] = {
|
| 770 |
'cache_name': cache.name,
|
| 771 |
+
'document_name': filename,
|
| 772 |
+
'document_file_name': document.name,
|
| 773 |
+
'source_url': url,
|
| 774 |
'created_at': datetime.now().isoformat()
|
| 775 |
}
|
| 776 |
|
| 777 |
+
# Get token count safely
|
| 778 |
+
token_count = 'Unknown'
|
| 779 |
+
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
|
| 780 |
+
if hasattr(cache.usage_metadata, 'total_token_count'):
|
| 781 |
+
token_count = cache.usage_metadata.total_token_count
|
| 782 |
+
elif hasattr(cache.usage_metadata, 'cached_token_count'):
|
| 783 |
+
token_count = cache.usage_metadata.cached_token_count
|
| 784 |
+
|
| 785 |
return jsonify({
|
| 786 |
'success': True,
|
| 787 |
'cache_id': cache_id,
|
| 788 |
+
'token_count': token_count,
|
| 789 |
+
'document_name': filename
|
| 790 |
})
|
| 791 |
|
| 792 |
except Exception as cache_error:
|
| 793 |
+
print(f"Cache error: {cache_error}")
|
| 794 |
# If caching fails due to small content, provide alternative approach
|
| 795 |
+
if "too small" in str(cache_error).lower():
|
| 796 |
return jsonify({
|
| 797 |
'success': False,
|
| 798 |
+
'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
|
| 799 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
| 800 |
})
|
| 801 |
else:
|
| 802 |
+
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
|
| 803 |
|
| 804 |
except Exception as e:
|
| 805 |
+
print(f"General error: {e}")
|
| 806 |
+
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
| 807 |
|
| 808 |
@app.route('/ask', methods=['POST'])
|
| 809 |
def ask_question():
|
|
|
|
| 816 |
return jsonify({'success': False, 'error': 'Missing question or cache_id'})
|
| 817 |
|
| 818 |
if cache_id not in document_caches:
|
| 819 |
+
return jsonify({'success': False, 'error': 'Cache not found. Please upload a document first.'})
|
| 820 |
|
| 821 |
cache_info = document_caches[cache_id]
|
| 822 |
|
| 823 |
# Generate response using cached content with correct model format
|
| 824 |
+
try:
|
| 825 |
+
response = client.models.generate_content(
|
| 826 |
+
model='gemini-2.0-flash-001', # No 'models/' prefix here
|
| 827 |
+
contents=question,
|
| 828 |
+
config=types.GenerateContentConfig(
|
| 829 |
+
cached_content=cache_info['cache_name']
|
| 830 |
+
)
|
| 831 |
)
|
| 832 |
+
|
| 833 |
+
if response and response.text:
|
| 834 |
+
return jsonify({
|
| 835 |
+
'success': True,
|
| 836 |
+
'answer': response.text
|
| 837 |
+
})
|
| 838 |
+
else:
|
| 839 |
+
return jsonify({
|
| 840 |
+
'success': False,
|
| 841 |
+
'error': 'No response generated from the model'
|
| 842 |
+
})
|
| 843 |
+
|
| 844 |
+
except Exception as gen_error:
|
| 845 |
+
print(f"Generation error: {gen_error}")
|
| 846 |
+
return jsonify({'success': False, 'error': f'Failed to generate response: {str(gen_error)}'})
|
| 847 |
|
| 848 |
except Exception as e:
|
| 849 |
+
print(f"General error in ask_question: {e}")
|
| 850 |
+
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
| 851 |
|
| 852 |
@app.route('/caches', methods=['GET'])
|
| 853 |
def list_caches():
|
|
|
|
| 874 |
cache_info = document_caches[cache_id]
|
| 875 |
|
| 876 |
# Delete from Gemini API
|
| 877 |
+
try:
|
| 878 |
+
client.caches.delete(cache_info['cache_name'])
|
| 879 |
+
except Exception as delete_error:
|
| 880 |
+
print(f"Error deleting cache from Gemini API: {delete_error}")
|
| 881 |
+
# Continue to remove from local storage even if API deletion fails
|
| 882 |
|
| 883 |
# Remove from local storage
|
| 884 |
del document_caches[cache_id]
|
|
|
|
| 888 |
except Exception as e:
|
| 889 |
return jsonify({'success': False, 'error': str(e)})
|
| 890 |
|
| 891 |
+
# Health check endpoint
|
| 892 |
+
@app.route('/health', methods=['GET'])
|
| 893 |
+
def health_check():
|
| 894 |
+
return jsonify({'status': 'healthy', 'service': 'Smart Document Analysis Platform'})
|
| 895 |
+
|
| 896 |
+
# Error handlers
|
| 897 |
+
@app.errorhandler(413)
|
| 898 |
+
def too_large(e):
|
| 899 |
+
return jsonify({'success': False, 'error': 'File too large'}), 413
|
| 900 |
+
|
| 901 |
+
@app.errorhandler(500)
|
| 902 |
+
def internal_error(e):
|
| 903 |
+
return jsonify({'success': False, 'error': 'Internal server error'}), 500
|
| 904 |
+
|
| 905 |
if __name__ == '__main__':
|
| 906 |
import os
|
| 907 |
port = int(os.environ.get("PORT", 7860))
|
| 908 |
+
print(f"Starting server on port {port}")
|
| 909 |
+
print(f"Google API Key configured: {'Yes' if GOOGLE_API_KEY else 'No'}")
|
| 910 |
+
app.run(debug=False, host='0.0.0.0', port=port)
|