File size: 2,833 Bytes
c57ea44
2ef3683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c57ea44
 
 
2ef3683
 
 
 
 
 
 
 
 
 
 
c57ea44
2ef3683
 
c57ea44
2ef3683
 
 
 
 
c57ea44
2ef3683
 
 
 
 
 
 
c57ea44
2ef3683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c57ea44
2ef3683
 
 
 
 
 
 
 
 
 
 
 
 
 
c57ea44
 
2ef3683
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from flask import Flask, request, jsonify
from ocr import OcrPriceExtractor
import io
from gevent.pywsgi import WSGIServer
from gevent import monkey
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
import logging
import os

# Patch all for gevent
monkey.patch_all()

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# Thread pool untuk OCR processing
ocr_pool = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count())
ocr = OcrPriceExtractor()

# ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp'}

# def allowed_file(filename):
#     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def process_ocr_task(bytes_io):
    """Process OCR in a separate thread"""
    try:
        text, prices = ocr.extract_text(bytes_io)
        return text, prices
    except Exception as e:
        logger.error(f"OCR processing error: {str(e)}")
        return None, None

@app.route('/ocr', methods=['POST'])
def process_image():
    try:
        if 'image' not in request.files:
            return jsonify({
                'success': False,
                'error': 'No image file provided'
            }), 400

        file = request.files['image']
        
        if file.filename == '':
            return jsonify({
                'success': False,
                'error': 'No selected file'
            }), 400

        # Read file to BytesIO
        bytes_io = io.BytesIO(file.read())
        
        # Submit OCR task to thread pool
        future = ocr_pool.submit(process_ocr_task, bytes_io)
        text, prices = future.result(timeout=30)  # 30 seconds timeout

        if text is None:
            return jsonify({
                'success': False,
                'error': 'Failed to process image'
            }), 500

        response = {
            'success': True,
            'data': {
                'text': text,
                'prices': prices
            }
        }

        return jsonify(response)

    except Exception as e:
        logger.error(f"API error: {str(e)}")
        return jsonify({
            'success': False,
            'error': str(e)
        }), 500

@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({
        'status': 'healthy',
        'service': 'ocr-api',
        'workers': ocr_pool._max_workers,
        'active_threads': len(ocr_pool._threads)
    })

if __name__ == '__main__':
    # Development mode
    if os.environ.get('FLASK_ENV') == 'development':
        app.run(host='0.0.0.0', port=5000, debug=True)
    else:
        # Production mode with gevent
        logger.info(f"Starting server with {ocr_pool._max_workers} workers")
        http_server = WSGIServer(('0.0.0.0', 5000), app)
        http_server.serve_forever()