JackWrion commited on
Commit
424fc11
·
1 Parent(s): 1485016
Files changed (2) hide show
  1. app.py +122 -0
  2. main.py +0 -42
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import pytesseract
4
+ from fastapi import FastAPI
5
+
6
+ pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
7
+
8
+
9
+
10
+ def TextLineBox(img):
11
+
12
+ class Lines:
13
+ def __init__(self,x,y,w,h,text):
14
+ self.x = x
15
+ self.y = y
16
+ self.w = w
17
+ self.h = h
18
+ self.text = text
19
+
20
+ lineboxes = []
21
+
22
+ #read image
23
+ img = cv2.GaussianBlur(img,(3,3),0)
24
+
25
+
26
+ ### Cofig
27
+ configname = r' --oem 3 --psm ' + str(12) + ' -l eng'
28
+
29
+ #### Text for testing
30
+ texttest = pytesseract.image_to_string(img ,config=configname)
31
+
32
+ ### Box of words
33
+ boxes = pytesseract.image_to_data(img, config=configname)
34
+ # print(boxes)
35
+
36
+ #slit box and concatenate into line
37
+ skip = 0
38
+ for b in boxes.splitlines():
39
+ ## skip header
40
+ if (skip == 0):
41
+ skip = 1
42
+ continue
43
+ ## get box of word in 1 object
44
+ b = b.split()
45
+ if (len(b) < 12): ## it is a space not a word
46
+ continue
47
+
48
+ #print(b)
49
+ x,y,w,h,text = int(b[6]),int(b[7]),int(b[8]),int(b[9]), b[11]
50
+
51
+
52
+ ### Begin New line if the word having num_word is 1
53
+ if (int(b[5]) == 1):
54
+ lineboxes.append( Lines(x,y,w,h,text) )
55
+
56
+ ### Next word inline
57
+ else:
58
+ lineboxes[-1].text += ' ' + text
59
+ if (x > lineboxes[-1].x):
60
+ lineboxes[-1].w = x - lineboxes[-1].x + w
61
+ if (y < lineboxes[-1].y):
62
+ lineboxes[-1].y = y
63
+ if (y+h > lineboxes[-1].y + lineboxes[-1].h):
64
+ lineboxes[-1].h = y+h - lineboxes[-1].y
65
+
66
+ #draw the box of WORD
67
+ cv2.rectangle(img, (x,y) , (w+x,y+h), (255,0,0), 2 )
68
+
69
+
70
+ return texttest,img
71
+
72
+
73
+ def Download(text):
74
+ with open("test.txt", "w") as file:
75
+ file.write(text)
76
+ return "test.txt"
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+ # mainInterface = gr.Interface(fn=TextLineBox,
85
+ # inputs=gr.Image(),
86
+ # outputs=[gr.Text(label="Result Text"), gr.Image(label="Boxes of Line")],
87
+ # )
88
+
89
+
90
+
91
+ with gr.Blocks (theme='JohnSmith9982/small_and_pretty' , css="#SUBMIT {background-color: red} #DOWNLOAD {background-color: green}") as demo:
92
+ with gr.Row():
93
+ with gr.Column():
94
+ input = gr.Image()
95
+ text_output = gr.Text(label="Result Text")
96
+ file_output = gr.File()
97
+ with gr.Row():
98
+ submit_btn = gr.Button("SUBMIT" , elem_id="SUBMIT")
99
+ download_btn = gr.Button("DOWNLOAD", elem_id="DOWNLOAD")
100
+ clear_btn = gr.Button("CLEAR")
101
+
102
+ with gr.Column():
103
+ image_output = gr.Image()
104
+
105
+ submit_btn.click(TextLineBox, input, outputs= [text_output, image_output, ] )
106
+ download_btn.click(Download, text_output, outputs= file_output )
107
+ clear_btn.click(lambda: [None,None,None], inputs=None, outputs= [text_output, file_output, image_output])
108
+
109
+ demo.launch()
110
+
111
+
112
+ # Cach 1 dung app Gradio (share = True)
113
+ # Cach 2 dung Mount_Gradio_app trong FASTAPI app
114
+ # Cach 3
115
+
116
+ # app = FastAPI()
117
+ # app = gr.mount_gradio_app(app, demo, path="/OCR" )
118
+
119
+
120
+
121
+
122
+ #bt.click(fn=None, _js="window.open('https://google.com', '_blank')")
main.py DELETED
@@ -1,42 +0,0 @@
1
- import OCR
2
- import os
3
-
4
- files = os.listdir('./testcase_source')
5
-
6
- # Filter the list to include only text files
7
- image_files = [file for file in files if file.endswith('.jpg')]
8
-
9
- # Loop over the text files
10
- check = 0
11
-
12
- for img in image_files:
13
-
14
- #Process image, them write result and log
15
- print("Processing: " + img)
16
-
17
- # Below will check special testcase you concern
18
- # if (img == "X51006619503.jpg"):
19
- # check = 1
20
- # if (check == 0 ):
21
- # print("Pass")
22
- # continue
23
-
24
- OCR.OCRText(img)
25
-
26
-
27
-
28
- #### ERROR ########
29
- # X51006619503.jpg
30
- # X51006619506.jpg
31
- # X51006619782
32
- # X51006619784.jpg
33
- # X51006620182.jpg
34
- # X51006620182.jpg
35
- #
36
- #
37
- #
38
- #
39
- #
40
- #
41
- # #
42
-