Text_extraction / app.py
gopichandra's picture
Update app.py
71eb2a7 verified
import pytesseract
import cv2
import numpy as np
from flask import Flask, request, jsonify
from flask_cors import CORS
# Initialize Flask app
app = Flask(__name__)
CORS(app)
def decode_base64_image(image_data):
"""
Decode a Base64 image into OpenCV format.
"""
try:
header, encoded = image_data.split(",", 1)
image_bytes = base64.b64decode(encoded)
np_array = np.frombuffer(image_bytes, np.uint8)
image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
return image
except Exception as e:
return None, f"Error decoding image: {str(e)}"
def preprocess_image(image):
"""
Preprocess the image for better OCR results.
"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
return binary
@app.route('/extract-text', methods=['POST'])
def extract_text():
"""
Extract text from an uploaded image.
"""
try:
data = request.json
if "image" not in data:
return jsonify({"error": "No image provided"}), 400
# Decode the Base64 image
image, error = decode_base64_image(data["image"])
if error:
return jsonify({"error": error}), 400
# Preprocess the image
preprocessed_image = preprocess_image(image)
# Extract text using Tesseract
text = pytesseract.image_to_string(preprocessed_image, config="--oem 3 --psm 6")
return jsonify({"text": text}), 200
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/status', methods=['GET'])
def status():
"""
Health check endpoint to verify the server is running.
"""
return jsonify({"status": "Server is running"}), 200
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)