gopichandra commited on
Commit
b029af5
·
verified ·
1 Parent(s): 5010f19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -34
app.py CHANGED
@@ -1,46 +1,84 @@
1
- import gradio as gr
2
  import pytesseract
3
  import cv2
4
  import numpy as np
 
 
5
  from PIL import Image
6
  import base64
7
 
8
- # Path to Tesseract executable
 
 
 
 
9
  pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
10
 
11
- # Function to decode image
12
- def decode_image(image):
13
- # Convert PIL Image to OpenCV format
14
- image = np.array(image)
15
- return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
16
 
17
- # Function to process and extract text
18
- def extract_text(image):
 
 
19
  try:
20
- # Decode the image
21
- decoded_image = decode_image(image)
22
-
23
- # Convert to grayscale
24
- gray_image = cv2.cvtColor(decoded_image, cv2.COLOR_BGR2GRAY)
25
-
26
- # Preprocess the image (optional)
27
- _, processed_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
28
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # Extract text using Tesseract
30
- text = pytesseract.image_to_string(processed_image, config="--oem 3 --psm 6")
31
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  except Exception as e:
33
- return f"Error: {str(e)}"
34
-
35
- # Gradio Interface
36
- interface = gr.Interface(
37
- fn=extract_text,
38
- inputs=gr.Image(type="pil", label="Upload Image"),
39
- outputs=gr.Textbox(label="Extracted Text"),
40
- title="Text Extraction App",
41
- description="Upload an image of an invoice or document to extract text."
42
- )
43
-
44
- # Run the app
45
- if __name__ == "__main__":
46
- interface.launch()
 
 
1
  import pytesseract
2
  import cv2
3
  import numpy as np
4
+ from flask import Flask, request, jsonify
5
+ from flask_cors import CORS
6
  from PIL import Image
7
  import base64
8
 
9
+ # Initialize Flask App
10
+ app = Flask(__name__)
11
+ CORS(app)
12
+
13
+ # Specify the Tesseract executable path (update this path if needed)
14
  pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
15
 
 
 
 
 
 
16
 
17
+ def decode_base64_image(image_data):
18
+ """
19
+ Decode a Base64 image into OpenCV format.
20
+ """
21
  try:
22
+ header, encoded = image_data.split(",", 1)
23
+ image_bytes = base64.b64decode(encoded)
24
+ np_array = np.frombuffer(image_bytes, np.uint8)
25
+ image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
26
+ return image
27
+ except Exception as e:
28
+ return None, f"Error decoding image: {str(e)}"
29
+
30
+
31
+ def preprocess_image(image):
32
+ """
33
+ Preprocess the image for better OCR results.
34
+ """
35
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Convert to grayscale
36
+ _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY) # Thresholding
37
+ return binary
38
+
39
+
40
+ @app.route('/extract-text', methods=['POST'])
41
+ def extract_text():
42
+ """
43
+ Extract text from an uploaded image.
44
+ """
45
+ try:
46
+ data = request.json
47
+ if "image" not in data:
48
+ return jsonify({"error": "No image provided"}), 400
49
+
50
+ # Decode the Base64 image
51
+ image, error = decode_base64_image(data["image"])
52
+ if error:
53
+ return jsonify({"error": error}), 400
54
+
55
+ # Preprocess the image
56
+ preprocessed_image = preprocess_image(image)
57
+
58
  # Extract text using Tesseract
59
+ text = pytesseract.image_to_string(preprocessed_image, config="--oem 3 --psm 6")
60
+ return jsonify({"text": text}), 200
61
+ except Exception as e:
62
+ return jsonify({"error": str(e)}), 500
63
+
64
+
65
+ @app.route('/status', methods=['GET'])
66
+ def status():
67
+ """
68
+ Health check endpoint to verify the server is running.
69
+ """
70
+ return jsonify({"status": "Server is running"}), 200
71
+
72
+
73
+ if __name__ == '__main__':
74
+ try:
75
+ # Ensure Tesseract is installed
76
+ version = pytesseract.get_tesseract_version()
77
+ print(f"Tesseract OCR version: {version}")
78
  except Exception as e:
79
+ print(f"Error: {e}")
80
+ print("Ensure Tesseract-OCR is installed and the path is correctly set.")
81
+ exit(1)
82
+
83
+ # Start the Flask server
84
+ app.run(debug=True, host='0.0.0.0', port=5000)