ocr / api.py
kerenmasku's picture
Update api.py
2ef3683 verified
from flask import Flask, request, jsonify
from ocr import OcrPriceExtractor
import io
from gevent.pywsgi import WSGIServer
from gevent import monkey
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
import logging
import os
# Patch all for gevent
monkey.patch_all()
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = Flask(__name__)
# Thread pool untuk OCR processing
ocr_pool = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count())
ocr = OcrPriceExtractor()
# ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp'}
# def allowed_file(filename):
# return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def process_ocr_task(bytes_io):
"""Process OCR in a separate thread"""
try:
text, prices = ocr.extract_text(bytes_io)
return text, prices
except Exception as e:
logger.error(f"OCR processing error: {str(e)}")
return None, None
@app.route('/ocr', methods=['POST'])
def process_image():
try:
if 'image' not in request.files:
return jsonify({
'success': False,
'error': 'No image file provided'
}), 400
file = request.files['image']
if file.filename == '':
return jsonify({
'success': False,
'error': 'No selected file'
}), 400
# Read file to BytesIO
bytes_io = io.BytesIO(file.read())
# Submit OCR task to thread pool
future = ocr_pool.submit(process_ocr_task, bytes_io)
text, prices = future.result(timeout=30) # 30 seconds timeout
if text is None:
return jsonify({
'success': False,
'error': 'Failed to process image'
}), 500
response = {
'success': True,
'data': {
'text': text,
'prices': prices
}
}
return jsonify(response)
except Exception as e:
logger.error(f"API error: {str(e)}")
return jsonify({
'success': False,
'error': str(e)
}), 500
@app.route('/health', methods=['GET'])
def health_check():
return jsonify({
'status': 'healthy',
'service': 'ocr-api',
'workers': ocr_pool._max_workers,
'active_threads': len(ocr_pool._threads)
})
if __name__ == '__main__':
# Development mode
if os.environ.get('FLASK_ENV') == 'development':
app.run(host='0.0.0.0', port=5000, debug=True)
else:
# Production mode with gevent
logger.info(f"Starting server with {ocr_pool._max_workers} workers")
http_server = WSGIServer(('0.0.0.0', 5000), app)
http_server.serve_forever()