Spaces:
Sleeping
Sleeping
File size: 5,115 Bytes
7de5e88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
"""
Flask API Server for Bill/Invoice OCR Extraction
"""
import os
import tempfile
from flask import Flask, request, jsonify
from flask_cors import CORS
from werkzeug.utils import secure_filename
from ocr_service import process_bill_image
app = Flask(__name__)
# Allow all origins for development (CORS)
CORS(app, supports_credentials=True)
# Configuration
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp', 'heic'}
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
def allowed_file(filename):
"""Check if file extension is allowed"""
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/api/health', methods=['GET'])
def health_check():
"""Health check endpoint"""
return jsonify({
'status': 'healthy',
'service': 'Invoice OCR API',
'version': '1.0.0'
})
@app.route('/api/extract', methods=['POST'])
def extract_invoice():
"""
Extract invoice data from uploaded image.
Expects multipart form data with 'image' file field.
Returns:
JSON with extracted header info and line items
"""
# Check if file was uploaded
if 'image' not in request.files:
return jsonify({
'success': False,
'error': 'No image file provided',
'header': {},
'items': []
}), 400
file = request.files['image']
if file.filename == '':
return jsonify({
'success': False,
'error': 'No file selected',
'header': {},
'items': []
}), 400
if not allowed_file(file.filename):
return jsonify({
'success': False,
'error': f'File type not allowed. Supported: {", ".join(ALLOWED_EXTENSIONS)}',
'header': {},
'items': []
}), 400
try:
# Save uploaded file to temp location
temp_dir = tempfile.mkdtemp()
filename = secure_filename(file.filename)
temp_path = os.path.join(temp_dir, filename)
file.save(temp_path)
try:
# Process the image
result = process_bill_image(temp_path)
if result['success']:
return jsonify(result), 200
else:
return jsonify(result), 422
finally:
# Cleanup temp file
try:
os.remove(temp_path)
os.rmdir(temp_dir)
except:
pass
except Exception as e:
return jsonify({
'success': False,
'error': f'Processing error: {str(e)}',
'header': {},
'items': []
}), 500
@app.route('/api/extract-base64', methods=['POST'])
def extract_from_base64():
"""
Extract invoice data from base64 encoded image.
Expects JSON with 'image' field containing base64 data.
Returns:
JSON with extracted header info and line items
"""
import base64
data = request.get_json()
if not data or 'image' not in data:
return jsonify({
'success': False,
'error': 'No image data provided',
'header': {},
'items': []
}), 400
try:
# Decode base64 image
image_data = data['image']
# Remove data URL prefix if present
if ',' in image_data:
image_data = image_data.split(',')[1]
image_bytes = base64.b64decode(image_data)
# Save to temp file
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, 'image.jpg')
with open(temp_path, 'wb') as f:
f.write(image_bytes)
try:
# Process the image
result = process_bill_image(temp_path)
if result['success']:
return jsonify(result), 200
else:
return jsonify(result), 422
finally:
# Cleanup
try:
os.remove(temp_path)
os.rmdir(temp_dir)
except:
pass
except Exception as e:
return jsonify({
'success': False,
'error': f'Processing error: {str(e)}',
'header': {},
'items': []
}), 500
if __name__ == '__main__':
print("=" * 50)
print("Invoice OCR API Server")
print("=" * 50)
print("Endpoints:")
print(" GET /api/health - Health check")
print(" POST /api/extract - Extract from file upload")
print(" POST /api/extract-base64 - Extract from base64")
print("=" * 50)
# Port 7860 is required for Hugging Face Spaces
app.run(host='0.0.0.0', port=7860, debug=True)
|