kasper-boy commited on
Commit
7992cef
·
verified ·
1 Parent(s): a224b87

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +8 -17
main.py CHANGED
@@ -2,24 +2,20 @@ import logging
2
  import time
3
  from pathlib import Path
4
  import contextlib
 
 
 
 
5
 
6
  logging.basicConfig(
7
  level=logging.INFO,
8
  format="%(asctime)s - %(levelname)s - %(message)s",
9
  )
10
 
11
-
12
- import gradio as gr
13
- import nltk
14
- import torch
15
-
16
- from pdf2text import *
17
-
18
  _here = Path(__file__).parent
19
 
20
  nltk.download("stopwords") # TODO=find where this requirement originates from
21
 
22
-
23
  def load_uploaded_file(file_obj, temp_dir: Path = None):
24
  """
25
  load_uploaded_file - process an uploaded file
@@ -52,7 +48,6 @@ def load_uploaded_file(file_obj, temp_dir: Path = None):
52
  print(f"Trying to load file with path {file_path}, error: {e}")
53
  return None
54
 
55
-
56
  def convert_PDF(
57
  pdf_obj,
58
  language: str = "en",
@@ -106,7 +101,6 @@ def convert_PDF(
106
 
107
  return converted_txt, html, _output_name
108
 
109
-
110
  if __name__ == "__main__":
111
  logging.info("Starting app")
112
 
@@ -121,7 +115,6 @@ if __name__ == "__main__":
121
  assume_straight_pages=True,
122
  )
123
 
124
- # define pdf bytes as None
125
  pdf_obj = _here / "try_example_file.pdf"
126
  pdf_obj = str(pdf_obj.resolve())
127
  _temp_dir = _here / "temp"
@@ -131,7 +124,6 @@ if __name__ == "__main__":
131
  demo = gr.Blocks()
132
 
133
  with demo:
134
-
135
  gr.Markdown("# PDF to Text")
136
  gr.Markdown(
137
  "A basic demo of pdf-to-text conversion using OCR from the [doctr](https://mindee.github.io/doctr/index.html) package"
@@ -139,7 +131,6 @@ if __name__ == "__main__":
139
  gr.Markdown("---")
140
 
141
  with gr.Column():
142
-
143
  gr.Markdown("## Load Inputs")
144
  gr.Markdown("Upload your own file & replace the default. Files should be < 10MB to avoid upload issues - search for a PDF compressor online as needed.")
145
  gr.Markdown(
@@ -149,8 +140,8 @@ if __name__ == "__main__":
149
  uploaded_file = gr.File(
150
  label="Upload a PDF file",
151
  file_count="single",
152
- type="file",
153
- value=_here / "try_example_file.pdf",
154
  )
155
 
156
  gr.Markdown("---")
@@ -166,7 +157,7 @@ if __name__ == "__main__":
166
  text_file = gr.File(
167
  label="Download Text File",
168
  file_count="single",
169
- type="file",
170
  interactive=False,
171
  )
172
 
@@ -175,4 +166,4 @@ if __name__ == "__main__":
175
  inputs=[uploaded_file],
176
  outputs=[OCR_text, out_placeholder, text_file],
177
  )
178
- demo.launch(enable_queue=True)
 
2
  import time
3
  from pathlib import Path
4
  import contextlib
5
+ import gradio as gr
6
+ import nltk
7
+ import torch
8
+ from pdf2text import *
9
 
10
  logging.basicConfig(
11
  level=logging.INFO,
12
  format="%(asctime)s - %(levelname)s - %(message)s",
13
  )
14
 
 
 
 
 
 
 
 
15
  _here = Path(__file__).parent
16
 
17
  nltk.download("stopwords") # TODO=find where this requirement originates from
18
 
 
19
  def load_uploaded_file(file_obj, temp_dir: Path = None):
20
  """
21
  load_uploaded_file - process an uploaded file
 
48
  print(f"Trying to load file with path {file_path}, error: {e}")
49
  return None
50
 
 
51
  def convert_PDF(
52
  pdf_obj,
53
  language: str = "en",
 
101
 
102
  return converted_txt, html, _output_name
103
 
 
104
  if __name__ == "__main__":
105
  logging.info("Starting app")
106
 
 
115
  assume_straight_pages=True,
116
  )
117
 
 
118
  pdf_obj = _here / "try_example_file.pdf"
119
  pdf_obj = str(pdf_obj.resolve())
120
  _temp_dir = _here / "temp"
 
124
  demo = gr.Blocks()
125
 
126
  with demo:
 
127
  gr.Markdown("# PDF to Text")
128
  gr.Markdown(
129
  "A basic demo of pdf-to-text conversion using OCR from the [doctr](https://mindee.github.io/doctr/index.html) package"
 
131
  gr.Markdown("---")
132
 
133
  with gr.Column():
 
134
  gr.Markdown("## Load Inputs")
135
  gr.Markdown("Upload your own file & replace the default. Files should be < 10MB to avoid upload issues - search for a PDF compressor online as needed.")
136
  gr.Markdown(
 
140
  uploaded_file = gr.File(
141
  label="Upload a PDF file",
142
  file_count="single",
143
+ type="filepath",
144
+ value=str(_here / "try_example_file.pdf"),
145
  )
146
 
147
  gr.Markdown("---")
 
157
  text_file = gr.File(
158
  label="Download Text File",
159
  file_count="single",
160
+ type="filepath",
161
  interactive=False,
162
  )
163
 
 
166
  inputs=[uploaded_file],
167
  outputs=[OCR_text, out_placeholder, text_file],
168
  )
169
+ demo.launch(enable_queue=True)