avimittal30 commited on
Commit
f487b74
·
verified ·
1 Parent(s): dc1d0ab

Update app_inference.py

Browse files
Files changed (1) hide show
  1. app_interface.py +50 -35
app_interface.py CHANGED
@@ -1,38 +1,53 @@
1
- from typing import List
2
-
3
- import pytesseract
4
  from PIL import Image
5
-
6
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- def tesseract_ocr(filepath: str, languages: List[str]):
9
- image = Image.open(filepath)
10
- return pytesseract.image_to_string(image=image, lang=', '.join(languages))
11
-
12
- title = "Tesseract OCR"
13
- description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine."
14
- article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"
15
- examples = [
16
- ['examples/eurotext.png', ['eng']],
17
- ['examples/tesseract_sample.png', ['jpn', 'eng']],
18
- ['examples/chi.jpg', ['HanS', 'HanT']]
19
- ]
20
-
21
- language_choices = pytesseract.get_languages()
22
-
23
- demo = gr.Interface(
24
- fn=tesseract_ocr,
25
- inputs=[
26
- gr.Image(type="filepath", label="Input"),
27
- gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='language')
28
- ],
29
- outputs='text',
30
- title=title,
31
- description=description,
32
- article=article,
33
- examples=examples,
34
- )
35
-
36
- if __name__ == '__main__':
37
- demo.launch()
38
- print("Finished running")
 
 
 
 
1
  from PIL import Image
2
+ import pytesseract
3
  import gradio as gr
4
+ import os
5
+ langs = []
6
+
7
+ choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
8
+
9
+ blocks = gr.Blocks()
10
+
11
+
12
+ # If you don't have tesseract executable in your PATH, include the following:
13
+ # pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_executable>'
14
+ # Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
15
+
16
+ # Simple image to string
17
+ # print(pytesseract.image_to_string(Image.open('eurotext.png')))
18
+
19
+ # # French text image to string
20
+ # print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra'))
21
+
22
+ # # Get bounding box estimates
23
+ # print(pytesseract.image_to_boxes(Image.open('test.png')))
24
+
25
+ # # Get verbose data including boxes, confidences, line and page numbers
26
+ # print(pytesseract.image_to_data(Image.open('test.png')))
27
+
28
+ # # Get information about orientation and script detection
29
+ # print(pytesseract.image_to_osd(Image.open('test.png'))
30
+
31
+
32
+ def run(image, lang=None):
33
+ result = pytesseract.image_to_string(
34
+ image, lang=None if lang == [] else lang)
35
+ return result
36
+
37
+
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("## Hello pytesseract!")
40
+ with gr.Row():
41
+ with gr.Column():
42
+ image_in = gr.Image(type="pil")
43
+ lang = gr.Dropdown(choices)
44
+ btn = gr.Button("Run")
45
+ with gr.Column():
46
+ text_out = gr.TextArea()
47
+
48
+ examples = gr.Examples([["./eurotext.png", None]], fn=run, inputs=[
49
+ image_in, lang], outputs=[text_out], cache_examples=False)
50
+ btn.click(fn=run, inputs=[image_in, lang], outputs=[text_out])
51
+
52
+ demo.launch()
53