pos-image-api

Sleeping

App Files Files Community

rairo commited on May 30, 2025

Commit

5fa6c98

verified ·

1 Parent(s): c26b237

Update main.py

Browse files

Files changed (1) hide show

main.py +317 -338

main.py CHANGED Viewed

@@ -2,372 +2,351 @@ from flask import Flask, request, jsonify
 import os
 import json
 import time
 from flask_cors import CORS
 from google import genai
-from google.genai import types
-from exa_py import Exa
-from linkup import LinkupClient
 app = Flask(__name__)
 CORS(app)
-# Environment variables
-GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-if not GOOGLE_API_KEY:
-    raise ValueError("GOOGLE_API_KEY environment variable is not set.")
-EXA_API_KEY = os.environ.get("EXA_API_KEY")
-if not EXA_API_KEY:
-    raise ValueError("EXA_API_KEY environment variable is not set.")
-LINKUP_API_KEY = os.environ.get("LINKUP_API_KEY")
-if not LINKUP_API_KEY:
-    raise ValueError("LINKUP_API_KEY environment variable is not set.")
-# Initialize clients
-exa = Exa(api_key=EXA_API_KEY)
-linkup_client = LinkupClient(api_key=LINKUP_API_KEY)
-def get_data(search_term):
-    """
-    Run the Linkup deep search for a given search term.
-    If a rate-limit error occurs, wait 10 seconds and retry.
-    """
-    full_query = f"{search_term} grants funding opportunities"
-    print("\n=== DEBUG: Start get_data() ===")
-    print(f"Search Term: {search_term}")
-    print(f"Full Query: {full_query}\n")
     try:
-        response = linkup_client.search(
-            query=full_query,
-            depth="deep",
-            output_type="sourcedAnswer",
-            include_images=False,
-        )
-        print("\n=== DEBUG: Raw result from linkup search ===")
-        print(response)
-        print("===========================================")
-        # Extract the answer content from Linkup response
-        content = ""
-        if hasattr(response, 'answer'):
-            content = response.answer
-        elif isinstance(response, dict) and 'answer' in response:
-            content = response['answer']
-        else:
-            content = str(response)
-        # Process the content with Gemini AI to extract structured grant data
-        structured_prompt = (
-            f"Based on the following search results about {search_term} grants, "
-            "extract and structure grant information with:\n"
-            "- Grant name/title\n"
-            "- Short summary \n"
-            "- Funding organization\n"
-            "- Grant value (numeric only)\n"
-            "- Application deadline\n"
-            "- Eligible countries\n"
-            "- Sector/field\n"
-            "- Eligibility criteria\n"
-            "- link URL\n"
-            "Return in JSON format with a 'grants' array.\n\n"
-            f"Search results: {content}"
-        )
-        client = genai.Client(api_key=GOOGLE_API_KEY)
-        gemini_response = client.models.generate_content(
-            model="models/gemini-2.0-flash-lite",
-            contents=f"{structured_prompt}, return the json string and nothing else"
         )
-        gemini_text = gemini_response.text
-        print(f"DEBUG: Gemini response: {gemini_text}")
-        # Parse JSON from Gemini response
-        try:
-            # Try to find JSON in the response
-            start_index = gemini_text.find('{')
-            if start_index == -1:
-                start_index = gemini_text.find('[')
-            if start_index != -1:
-                if gemini_text[start_index] == '{':
-                    end_index = gemini_text.rfind('}') + 1
-                else:
-                    end_index = gemini_text.rfind(']') + 1
-                json_string = gemini_text[start_index:end_index]
-                result = json.loads(json_string)
-                # Ensure result has grants array
-                if isinstance(result, list):
-                    result = {"grants": result}
-                elif isinstance(result, dict) and "grants" not in result:
-                    # If it's a dict but no grants key, assume it's a single grant
-                    result = {"grants": [result]}
-            else:
-                result = {"grants": []}
-        except json.JSONDecodeError as je:
-            print(f"ERROR: Failed to parse JSON from Gemini response: {je}")
-            result = {"grants": []}
-        if not result or "grants" not in result or not result["grants"]:
-            print(f"DEBUG: No grants found for '{search_term}'.")
-            return {"error": f"No results returned for '{search_term}'. Please try again with a different search term."}
-        print("DEBUG: Grants found, returning results.")
-        return result
     except Exception as e:
-        err_str = str(e)
-        print(f"ERROR: Exception occurred - {err_str}")
-        # Check for rate limiting or similar errors
-        if "rate" in err_str.lower() or "limit" in err_str.lower():
-            print("DEBUG: Rate limit detected. Retrying in 10 seconds...")
-            time.sleep(10)
-            try:
-                response = linkup_client.search(
-                    query=full_query,
-                    depth="deep",
-                    output_type="sourcedAnswer",
-                    include_images=False,
-                )
-                # Process retry response similar to above
-                content = ""
-                if hasattr(response, 'answer'):
-                    content = response.answer
-                elif isinstance(response, dict) and 'answer' in response:
-                    content = response['answer']
-                else:
-                    content = str(response)
-                structured_prompt = (
-                    f"Based on the following search results about {search_term} grants, "
-                    "extract and structure grant information with:\n"
-                    "- Grant name/title\n"
-                    "- Short summary \n"
-                    "- Funding organization\n"
-                    "- Grant value (numeric only)\n"
-                    "- Application deadline\n"
-                    "- Eligible countries\n"
-                    "- Sector/field\n"
-                    "- Eligibility criteria\n"
-                    "- link URL\n"
-                    "Return in JSON format with a 'grants' array.\n\n"
-                    f"Search results: {content}"
-                )
-                client = genai.Client(api_key=GOOGLE_API_KEY)
-                gemini_response = client.models.generate_content(
-                    model="models/gemini-2.0-flash-lite",
-                    contents=f"{structured_prompt}, return the json string and nothing else"
-                )
-                gemini_text = gemini_response.text
-                try:
-                    start_index = gemini_text.find('{')
-                    if start_index == -1:
-                        start_index = gemini_text.find('[')
-                    if start_index != -1:
-                        if gemini_text[start_index] == '{':
-                            end_index = gemini_text.rfind('}') + 1
-                        else:
-                            end_index = gemini_text.rfind(']') + 1
-                        json_string = gemini_text[start_index:end_index]
-                        result = json.loads(json_string)
-                        if isinstance(result, list):
-                            result = {"grants": result}
-                        elif isinstance(result, dict) and "grants" not in result:
-                            result = {"grants": [result]}
-                    else:
-                        result = {"grants": []}
-                except json.JSONDecodeError:
-                    result = {"grants": []}
-                if not result or "grants" not in result or not result["grants"]:
-                    print(f"DEBUG: No grants found after retry for '{search_term}'.")
-                    return {"error": f"No results returned for '{search_term}' after retry. Please try again with a different search term."}
-                print("DEBUG: Grants found on retry, returning results.")
-                return result
-            except Exception as e2:
-                print(f"ERROR: Retry failed - {str(e2)}")
-                return {"error": f"Retry failed for '{search_term}': {str(e2)}. Please try again later."}
-        else:
-            return {"error": f"An error occurred for '{search_term}': {str(e)}. Please try again."}
-def process_multiple_search_terms(search_terms):
-    """
-    Process multiple search terms and aggregate results.
-    Returns a dictionary with a 'grants' key containing combined results.
-    """
-    all_data = {"grants": []}
-    for term in search_terms:
-        term = term.strip()
-        if not term:
-            continue
-        result = get_data(term)
-        if result and result.get("grants"):
-            all_data["grants"].extend(result["grants"])
-    return all_data
-@app.route("/scrape", methods=["POST"])
-def scrape():
-    """
-    Endpoint to scrape grant opportunities using search terms.
-    Expects a JSON body with the key 'search_terms' (a string with newline-separated search terms
-    or a list of strings). Returns JSON with the aggregated results.
-    """
-    data = request.get_json()
-    if not data or "search_terms" not in data:
-        return jsonify({"error": "Request must include 'search_terms' key."}), 400
-    search_terms = data["search_terms"]
-    if isinstance(search_terms, str):
-        search_terms = [s.strip() for s in search_terms.split("\n") if s.strip()]
-    elif not isinstance(search_terms, list):
-        return jsonify({"error": "'search_terms' must be a string or list of strings."}), 400
-    if not search_terms:
-        return jsonify({"error": "No valid search terms provided."}), 400
-    result = process_multiple_search_terms(search_terms)
-    return jsonify(result), 200
-def get_data_from_url(url):
-    """
-    Scrape the provided URL using Exa API.
-    Extract grant data using Gemini AI.
-    """
-    print(f"\n=== DEBUG: Start get_data_from_url() ===")
-    print(f"URL: {url}")
     try:
-        # Use Exa to get content from URL
-        result = exa.get_contents(
-            [url],
-            text=True
         )
-        print("\n=== DEBUG: Raw result from Exa ===")
-        print(result)
-        print("=====================================")
-        # Extract text content from Exa response
-        page_content = ""
-        if hasattr(result, 'results') and result.results:
-            page_content = result.results[0].text if hasattr(result.results[0], 'text') else str(result.results[0])
-        elif isinstance(result, dict) and 'results' in result and result['results']:
-            page_content = result['results'][0].get('text', str(result['results'][0]))
-        else:
-            page_content = str(result)
-        if not page_content:
-            print("ERROR: No content extracted from URL")
-            return {}
-        print(f"DEBUG: Extracted content length: {len(page_content)}")
-        # Process content with Gemini AI
-        full_prompt = (
-            "Extract the following grant data from the provided web content. "
-            "- Grant name/title\n"
-            "- Short summary\n"
-            "- Funding organization\n"
-            "- Grant value (numeric only)\n"
-            "- Application deadline\n"
-            "- Eligible countries\n"
-            "- Sector/field\n"
-            "- Eligibility criteria\n"
-            "Return in JSON format with a 'grants' array.\n\n"
-            f"Web content: {page_content[:10000]}"  # Limit content to avoid token limits
-        )
-        client = genai.Client(api_key=GOOGLE_API_KEY)
-        gemini_response = client.models.generate_content(
-            model="models/gemini-2.0-flash-lite",
-            contents=f"{full_prompt}, return the json string and nothing else"
         )
-        response_text = gemini_response.text
-        print(f"DEBUG: Gemini response: {response_text}")
-        # Extract JSON output from Gemini
-        try:
-            start_index = response_text.find('[')
-            if start_index == -1:
-                start_index = response_text.find('{')
-            if start_index != -1:
-                if response_text[start_index] == '[':
-                    end_index = response_text.rfind(']') + 1
-                else:
-                    end_index = response_text.rfind('}') + 1
-                json_string = response_text[start_index:end_index]
-                parsed_result = json.loads(json_string)
-                # Ensure JSON is wrapped correctly
-                if isinstance(parsed_result, list):
-                    parsed_result = {"grants": parsed_result}
-                elif isinstance(parsed_result, dict) and "grants" not in parsed_result:
-                    # If it's a dict but no grants key, assume it's a single grant
-                    parsed_result = {"grants": [parsed_result]}
-            else:
-                parsed_result = {"grants": []}
-        except Exception as parse_error:
-            print(f"Error parsing JSON from Gemini model response: {parse_error}")
-            print(f"Response: {response_text}")
-            return {}
-        if not parsed_result.get("grants"):
-            print("No grant opportunities found in the scraped URL.")
-            return {}
-        print(f"DEBUG: Found {len(parsed_result['grants'])} grants")
-        if parsed_result['grants']:
-            print(f"First grant opportunity: {parsed_result['grants'][0]}")
-        return parsed_result
     except Exception as e:
-        print(f"ERROR: Exception in get_data_from_url: {str(e)}")
-        return {}
-@app.route("/scrape_url", methods=["POST"])
-def scrape_url():
-    """
-    Endpoint to scrape a provided URL for grant opportunities.
-    Expects a JSON body with the key 'url'.
-    Returns the scraped and processed grant data in JSON format.
-    """
-    data = request.get_json()
-    if not data or "url" not in data:
-        return jsonify({"error": "Request must include 'url' key."}), 400
-    url = data["url"]
-    result = get_data_from_url(url)
-    if not result:
-        return jsonify({"error": "Failed to scrape URL or no grants found."}), 500
-    return jsonify(result), 200
 if __name__ == "__main__":
     app.run(debug=True, host="0.0.0.0", port=7860)

 import os
 import json
 import time
+import base64
+import uuid
 from flask_cors import CORS
 from google import genai
+from PIL import Image
+import io
+from typing import List, Dict, Any
+import logging
 app = Flask(__name__)
 CORS(app)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Configure GenAI
+GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
+if not GOOGLE_API_KEY:
+    raise ValueError("GOOGLE_API_KEY environment variable is required")
+client = genai.Client(api_key=GOOGLE_API_KEY)
+# In-memory storage for multi-part receipts (use Redis/database in production)
+receipt_sessions = {}
+RECEIPT_ANALYSIS_PROMPT = """
+Analyze this receipt image and extract the following information in JSON format:
+- items: List of items with their details
+- receipt_date: Date from the receipt (YYYY-MM-DD format)
+- total_amount: Total amount from receipt
+- store_name: Name of the store/merchant
+For each item, provide:
+- name: Item name/description
+- quantity: Quantity purchased (default to 1 if not specified)
+- unit_price: Price per unit
+- total_price: Total price for this item
+- category: Categorize as either "stock" (inventory items, products for resale, raw materials) or "expense" (office supplies, utilities, services, consumables)
+Use your best judgment to categorize items:
+- "stock": Products intended for sale, raw materials, inventory items
+- "expense": Office supplies, utilities, services, maintenance, consumables
+Return only valid JSON without any markdown formatting or code blocks.
+"""
+MULTI_PART_ANALYSIS_PROMPT = """
+Analyze these multiple images of the same receipt and extract all information in JSON format:
+- items: Complete list of all items from all images
+- receipt_date: Date from the receipt (YYYY-MM-DD format)
+- total_amount: Total amount from receipt
+- store_name: Name of the store/merchant
+For each item, provide:
+- name: Item name/description
+- quantity: Quantity purchased (default to 1 if not specified)
+- unit_price: Price per unit
+- total_price: Total price for this item
+- category: Categorize as either "stock" (inventory items, products for resale, raw materials) or "expense" (office supplies, utilities, services, consumables)
+Combine information from all images to create a complete receipt analysis.
+Return only valid JSON without any markdown formatting or code blocks.
+"""
+def encode_image_to_base64(image_data):
+    """Convert image data to base64 string."""
     try:
+        if isinstance(image_data, str):
+            # If it's already base64, return as is
+            return image_data
+        # Convert bytes to base64
+        return base64.b64encode(image_data).decode('utf-8')
+    except Exception as e:
+        logger.error(f"Error encoding image: {str(e)}")
+        raise
+def process_single_receipt(image_data, content_type="image/jpeg"):
+    """Process a single receipt image."""
+    try:
+        base64_image = encode_image_to_base64(image_data)
+        # Create the request with the image
+        response = client.models.generate_content(
+            model='gemini-2.0-flash',
+            contents=[
+                {
+                    'parts': [
+                        {'text': RECEIPT_ANALYSIS_PROMPT},
+                        {
+                            'inline_data': {
+                                'mime_type': content_type,
+                                'data': base64_image
+                            }
+                        }
+                    ]
+                }
+            ]
         )
+        # Extract and parse the response
+        result_text = response.text.strip()
+        # Remove any markdown code block formatting
+        if result_text.startswith('```json'):
+            result_text = result_text[7:]
+        if result_text.endswith('```'):
+            result_text = result_text[:-3]
+        result_json = json.loads(result_text.strip())
+        return result_json
+    except json.JSONDecodeError as e:
+        logger.error(f"JSON parsing error: {str(e)}")
+        raise ValueError(f"Failed to parse AI response as JSON: {str(e)}")
     except Exception as e:
+        logger.error(f"Error processing receipt: {str(e)}")
+        raise
+def process_multi_part_receipt(images_data, content_types):
+    """Process multiple images of the same receipt."""
     try:
+        parts = [{'text': MULTI_PART_ANALYSIS_PROMPT}]
+        # Add each image to the request
+        for i, (image_data, content_type) in enumerate(zip(images_data, content_types)):
+            base64_image = encode_image_to_base64(image_data)
+            parts.append({
+                'inline_data': {
+                    'mime_type': content_type,
+                    'data': base64_image
+                }
+            })
+        response = client.models.generate_content(
+            model='gemini-1.5-flash',
+            contents=[{'parts': parts}]
         )
+        # Extract and parse the response
+        result_text = response.text.strip()
+        # Remove any markdown code block formatting
+        if result_text.startswith('```json'):
+            result_text = result_text[7:]
+        if result_text.endswith('```'):
+            result_text = result_text[:-3]
+        result_json = json.loads(result_text.strip())
+        return result_json
+    except json.JSONDecodeError as e:
+        logger.error(f"JSON parsing error: {str(e)}")
+        raise ValueError(f"Failed to parse AI response as JSON: {str(e)}")
+    except Exception as e:
+        logger.error(f"Error processing multi-part receipt: {str(e)}")
+        raise
+@app.route('/process-receipt', methods=['POST'])
+def process_receipt():
+    """Process a single receipt image."""
+    try:
+        if 'image' not in request.files:
+            return jsonify({'error': 'No image file provided'}), 400
+        file = request.files['image']
+        if file.filename == '':
+            return jsonify({'error': 'No image file selected'}), 400
+        # Read image data
+        image_data = file.read()
+        content_type = file.content_type or 'image/jpeg'
+        # Process the receipt
+        result = process_single_receipt(image_data, content_type)
+        return jsonify({
+            'success': True,
+            'data': result,
+            'message': 'Receipt processed successfully'
+        })
+    except ValueError as e:
+        return jsonify({'error': str(e)}), 400
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        return jsonify({'error': 'Internal server error'}), 500
+@app.route('/start-receipt-session', methods=['POST'])
+def start_receipt_session():
+    """Start a new multi-part receipt session."""
+    session_id = str(uuid.uuid4())
+    receipt_sessions[session_id] = {
+        'images': [],
+        'content_types': [],
+        'created_at': time.time()
+    }
+    return jsonify({
+        'success': True,
+        'session_id': session_id,
+        'message': 'Receipt session started'
+    })
+@app.route('/add-receipt-part/<session_id>', methods=['POST'])
+def add_receipt_part(session_id):
+    """Add an image part to an existing receipt session."""
+    try:
+        if session_id not in receipt_sessions:
+            return jsonify({'error': 'Invalid session ID'}), 404
+        if 'image' not in request.files:
+            return jsonify({'error': 'No image file provided'}), 400
+        file = request.files['image']
+        if file.filename == '':
+            return jsonify({'error': 'No image file selected'}), 400
+        # Read and store image data
+        image_data = file.read()
+        content_type = file.content_type or 'image/jpeg'
+        receipt_sessions[session_id]['images'].append(image_data)
+        receipt_sessions[session_id]['content_types'].append(content_type)
+        return jsonify({
+            'success': True,
+            'parts_count': len(receipt_sessions[session_id]['images']),
+            'message': 'Receipt part added successfully'
+        })
+    except Exception as e:
+        logger.error(f"Error adding receipt part: {str(e)}")
+        return jsonify({'error': 'Internal server error'}), 500
+@app.route('/process-receipt-session/<session_id>', methods=['POST'])
+def process_receipt_session(session_id):
+    """Process all parts of a multi-part receipt."""
+    try:
+        if session_id not in receipt_sessions:
+            return jsonify({'error': 'Invalid session ID'}), 404
+        session_data = receipt_sessions[session_id]
+        if not session_data['images']:
+            return jsonify({'error': 'No images in session'}), 400
+        # Process the multi-part receipt
+        result = process_multi_part_receipt(
+            session_data['images'],
+            session_data['content_types']
         )
+        # Clean up session
+        del receipt_sessions[session_id]
+        return jsonify({
+            'success': True,
+            'data': result,
+            'message': 'Multi-part receipt processed successfully'
+        })
+    except ValueError as e:
+        return jsonify({'error': str(e)}), 400
+    except Exception as e:
+        logger.error(f"Error processing receipt session: {str(e)}")
+        return jsonify({'error': 'Internal server error'}), 500
+@app.route('/bulk-process-receipts', methods=['POST'])
+def bulk_process_receipts():
+    """Process multiple individual receipts in bulk."""
+    try:
+        if 'images' not in request.files:
+            return jsonify({'error': 'No image files provided'}), 400
+        files = request.files.getlist('images')
+        if not files:
+            return jsonify({'error': 'No image files selected'}), 400
+        results = []
+        errors = []
+        for i, file in enumerate(files):
+            try:
+                if file.filename == '':
+                    errors.append(f"File {i+1}: No filename")
+                    continue
+                # Read image data
+                image_data = file.read()
+                content_type = file.content_type or 'image/jpeg'
+                # Process the receipt
+                result = process_single_receipt(image_data, content_type)
+                results.append({
+                    'file_index': i + 1,
+                    'filename': file.filename,
+                    'data': result
+                })
+            except Exception as e:
+                errors.append(f"File {i+1} ({file.filename}): {str(e)}")
+        return jsonify({
+            'success': True,
+            'processed_count': len(results),
+            'error_count': len(errors),
+            'results': results,
+            'errors': errors,
+            'message': f'Bulk processing completed. {len(results)} successful, {len(errors)} errors.'
+        })
     except Exception as e:
+        logger.error(f"Error in bulk processing: {str(e)}")
+        return jsonify({'error': 'Internal server error'}), 500
+@app.route('/health', methods=['GET'])
+def health_check():
+    """Health check endpoint."""
+    return jsonify({
+        'status': 'healthy',
+        'timestamp': time.time(),
+        'active_sessions': len(receipt_sessions)
+    })
+@app.route('/cleanup-sessions', methods=['POST'])
+def cleanup_old_sessions():
+    """Clean up old receipt sessions (older than 1 hour)."""
+    current_time = time.time()
+    cutoff_time = current_time - 3600  # 1 hour
+    old_sessions = [
+        session_id for session_id, data in receipt_sessions.items()
+        if data['created_at'] < cutoff_time
+    ]
+    for session_id in old_sessions:
+        del receipt_sessions[session_id]
+    return jsonify({
+        'success': True,
+        'cleaned_sessions': len(old_sessions),
+        'remaining_sessions': len(receipt_sessions)
+    })
 if __name__ == "__main__":
     app.run(debug=True, host="0.0.0.0", port=7860)