File size: 5,115 Bytes
7de5e88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""

Flask API Server for Bill/Invoice OCR Extraction

"""

import os
import tempfile
from flask import Flask, request, jsonify
from flask_cors import CORS
from werkzeug.utils import secure_filename

from ocr_service import process_bill_image

app = Flask(__name__)

# Allow all origins for development (CORS)
CORS(app, supports_credentials=True)

# Configuration
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp', 'heic'}
MAX_CONTENT_LENGTH = 16 * 1024 * 1024  # 16MB max

app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH


def allowed_file(filename):
    """Check if file extension is allowed"""
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


@app.route('/api/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'service': 'Invoice OCR API',
        'version': '1.0.0'
    })


@app.route('/api/extract', methods=['POST'])
def extract_invoice():
    """

    Extract invoice data from uploaded image.

    

    Expects multipart form data with 'image' file field.

    

    Returns:

        JSON with extracted header info and line items

    """
    # Check if file was uploaded
    if 'image' not in request.files:
        return jsonify({
            'success': False,
            'error': 'No image file provided',
            'header': {},
            'items': []
        }), 400
    
    file = request.files['image']
    
    if file.filename == '':
        return jsonify({
            'success': False,
            'error': 'No file selected',
            'header': {},
            'items': []
        }), 400
    
    if not allowed_file(file.filename):
        return jsonify({
            'success': False,
            'error': f'File type not allowed. Supported: {", ".join(ALLOWED_EXTENSIONS)}',
            'header': {},
            'items': []
        }), 400
    
    try:
        # Save uploaded file to temp location
        temp_dir = tempfile.mkdtemp()
        filename = secure_filename(file.filename)
        temp_path = os.path.join(temp_dir, filename)
        file.save(temp_path)
        
        try:
            # Process the image
            result = process_bill_image(temp_path)
            
            if result['success']:
                return jsonify(result), 200
            else:
                return jsonify(result), 422
        
        finally:
            # Cleanup temp file
            try:
                os.remove(temp_path)
                os.rmdir(temp_dir)
            except:
                pass
    
    except Exception as e:
        return jsonify({
            'success': False,
            'error': f'Processing error: {str(e)}',
            'header': {},
            'items': []
        }), 500


@app.route('/api/extract-base64', methods=['POST'])
def extract_from_base64():
    """

    Extract invoice data from base64 encoded image.

    

    Expects JSON with 'image' field containing base64 data.

    

    Returns:

        JSON with extracted header info and line items

    """
    import base64
    
    data = request.get_json()
    
    if not data or 'image' not in data:
        return jsonify({
            'success': False,
            'error': 'No image data provided',
            'header': {},
            'items': []
        }), 400
    
    try:
        # Decode base64 image
        image_data = data['image']
        
        # Remove data URL prefix if present
        if ',' in image_data:
            image_data = image_data.split(',')[1]
        
        image_bytes = base64.b64decode(image_data)
        
        # Save to temp file
        temp_dir = tempfile.mkdtemp()
        temp_path = os.path.join(temp_dir, 'image.jpg')
        
        with open(temp_path, 'wb') as f:
            f.write(image_bytes)
        
        try:
            # Process the image
            result = process_bill_image(temp_path)
            
            if result['success']:
                return jsonify(result), 200
            else:
                return jsonify(result), 422
        
        finally:
            # Cleanup
            try:
                os.remove(temp_path)
                os.rmdir(temp_dir)
            except:
                pass
    
    except Exception as e:
        return jsonify({
            'success': False,
            'error': f'Processing error: {str(e)}',
            'header': {},
            'items': []
        }), 500


if __name__ == '__main__':
    print("=" * 50)
    print("Invoice OCR API Server")
    print("=" * 50)
    print("Endpoints:")
    print("  GET  /api/health     - Health check")
    print("  POST /api/extract    - Extract from file upload")
    print("  POST /api/extract-base64 - Extract from base64")
    print("=" * 50)
    
    # Port 7860 is required for Hugging Face Spaces
    app.run(host='0.0.0.0', port=7860, debug=True)