invoice_ocr_api / app.py
Prathamesh10's picture
Upload 5 files
7de5e88 verified
"""
Flask API Server for Bill/Invoice OCR Extraction
"""
import os
import tempfile
from flask import Flask, request, jsonify
from flask_cors import CORS
from werkzeug.utils import secure_filename
from ocr_service import process_bill_image
app = Flask(__name__)
# Allow all origins for development (CORS)
CORS(app, supports_credentials=True)
# Configuration
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp', 'heic'}
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
def allowed_file(filename):
"""Check if file extension is allowed"""
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/api/health', methods=['GET'])
def health_check():
"""Health check endpoint"""
return jsonify({
'status': 'healthy',
'service': 'Invoice OCR API',
'version': '1.0.0'
})
@app.route('/api/extract', methods=['POST'])
def extract_invoice():
"""
Extract invoice data from uploaded image.
Expects multipart form data with 'image' file field.
Returns:
JSON with extracted header info and line items
"""
# Check if file was uploaded
if 'image' not in request.files:
return jsonify({
'success': False,
'error': 'No image file provided',
'header': {},
'items': []
}), 400
file = request.files['image']
if file.filename == '':
return jsonify({
'success': False,
'error': 'No file selected',
'header': {},
'items': []
}), 400
if not allowed_file(file.filename):
return jsonify({
'success': False,
'error': f'File type not allowed. Supported: {", ".join(ALLOWED_EXTENSIONS)}',
'header': {},
'items': []
}), 400
try:
# Save uploaded file to temp location
temp_dir = tempfile.mkdtemp()
filename = secure_filename(file.filename)
temp_path = os.path.join(temp_dir, filename)
file.save(temp_path)
try:
# Process the image
result = process_bill_image(temp_path)
if result['success']:
return jsonify(result), 200
else:
return jsonify(result), 422
finally:
# Cleanup temp file
try:
os.remove(temp_path)
os.rmdir(temp_dir)
except:
pass
except Exception as e:
return jsonify({
'success': False,
'error': f'Processing error: {str(e)}',
'header': {},
'items': []
}), 500
@app.route('/api/extract-base64', methods=['POST'])
def extract_from_base64():
"""
Extract invoice data from base64 encoded image.
Expects JSON with 'image' field containing base64 data.
Returns:
JSON with extracted header info and line items
"""
import base64
data = request.get_json()
if not data or 'image' not in data:
return jsonify({
'success': False,
'error': 'No image data provided',
'header': {},
'items': []
}), 400
try:
# Decode base64 image
image_data = data['image']
# Remove data URL prefix if present
if ',' in image_data:
image_data = image_data.split(',')[1]
image_bytes = base64.b64decode(image_data)
# Save to temp file
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, 'image.jpg')
with open(temp_path, 'wb') as f:
f.write(image_bytes)
try:
# Process the image
result = process_bill_image(temp_path)
if result['success']:
return jsonify(result), 200
else:
return jsonify(result), 422
finally:
# Cleanup
try:
os.remove(temp_path)
os.rmdir(temp_dir)
except:
pass
except Exception as e:
return jsonify({
'success': False,
'error': f'Processing error: {str(e)}',
'header': {},
'items': []
}), 500
if __name__ == '__main__':
print("=" * 50)
print("Invoice OCR API Server")
print("=" * 50)
print("Endpoints:")
print(" GET /api/health - Health check")
print(" POST /api/extract - Extract from file upload")
print(" POST /api/extract-base64 - Extract from base64")
print("=" * 50)
# Port 7860 is required for Hugging Face Spaces
app.run(host='0.0.0.0', port=7860, debug=True)