Spaces:
Sleeping
Sleeping
| """ | |
| Flask API Server for Bill/Invoice OCR Extraction | |
| """ | |
| import os | |
| import tempfile | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| from werkzeug.utils import secure_filename | |
| from ocr_service import process_bill_image | |
| app = Flask(__name__) | |
| # Allow all origins for development (CORS) | |
| CORS(app, supports_credentials=True) | |
| # Configuration | |
| ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp', 'heic'} | |
| MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max | |
| app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH | |
| def allowed_file(filename): | |
| """Check if file extension is allowed""" | |
| return '.' in filename and \ | |
| filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def health_check(): | |
| """Health check endpoint""" | |
| return jsonify({ | |
| 'status': 'healthy', | |
| 'service': 'Invoice OCR API', | |
| 'version': '1.0.0' | |
| }) | |
| def extract_invoice(): | |
| """ | |
| Extract invoice data from uploaded image. | |
| Expects multipart form data with 'image' file field. | |
| Returns: | |
| JSON with extracted header info and line items | |
| """ | |
| # Check if file was uploaded | |
| if 'image' not in request.files: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'No image file provided', | |
| 'header': {}, | |
| 'items': [] | |
| }), 400 | |
| file = request.files['image'] | |
| if file.filename == '': | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'No file selected', | |
| 'header': {}, | |
| 'items': [] | |
| }), 400 | |
| if not allowed_file(file.filename): | |
| return jsonify({ | |
| 'success': False, | |
| 'error': f'File type not allowed. Supported: {", ".join(ALLOWED_EXTENSIONS)}', | |
| 'header': {}, | |
| 'items': [] | |
| }), 400 | |
| try: | |
| # Save uploaded file to temp location | |
| temp_dir = tempfile.mkdtemp() | |
| filename = secure_filename(file.filename) | |
| temp_path = os.path.join(temp_dir, filename) | |
| file.save(temp_path) | |
| try: | |
| # Process the image | |
| result = process_bill_image(temp_path) | |
| if result['success']: | |
| return jsonify(result), 200 | |
| else: | |
| return jsonify(result), 422 | |
| finally: | |
| # Cleanup temp file | |
| try: | |
| os.remove(temp_path) | |
| os.rmdir(temp_dir) | |
| except: | |
| pass | |
| except Exception as e: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': f'Processing error: {str(e)}', | |
| 'header': {}, | |
| 'items': [] | |
| }), 500 | |
| def extract_from_base64(): | |
| """ | |
| Extract invoice data from base64 encoded image. | |
| Expects JSON with 'image' field containing base64 data. | |
| Returns: | |
| JSON with extracted header info and line items | |
| """ | |
| import base64 | |
| data = request.get_json() | |
| if not data or 'image' not in data: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'No image data provided', | |
| 'header': {}, | |
| 'items': [] | |
| }), 400 | |
| try: | |
| # Decode base64 image | |
| image_data = data['image'] | |
| # Remove data URL prefix if present | |
| if ',' in image_data: | |
| image_data = image_data.split(',')[1] | |
| image_bytes = base64.b64decode(image_data) | |
| # Save to temp file | |
| temp_dir = tempfile.mkdtemp() | |
| temp_path = os.path.join(temp_dir, 'image.jpg') | |
| with open(temp_path, 'wb') as f: | |
| f.write(image_bytes) | |
| try: | |
| # Process the image | |
| result = process_bill_image(temp_path) | |
| if result['success']: | |
| return jsonify(result), 200 | |
| else: | |
| return jsonify(result), 422 | |
| finally: | |
| # Cleanup | |
| try: | |
| os.remove(temp_path) | |
| os.rmdir(temp_dir) | |
| except: | |
| pass | |
| except Exception as e: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': f'Processing error: {str(e)}', | |
| 'header': {}, | |
| 'items': [] | |
| }), 500 | |
| if __name__ == '__main__': | |
| print("=" * 50) | |
| print("Invoice OCR API Server") | |
| print("=" * 50) | |
| print("Endpoints:") | |
| print(" GET /api/health - Health check") | |
| print(" POST /api/extract - Extract from file upload") | |
| print(" POST /api/extract-base64 - Extract from base64") | |
| print("=" * 50) | |
| # Port 7860 is required for Hugging Face Spaces | |
| app.run(host='0.0.0.0', port=7860, debug=True) | |