Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
|
@@ -2,24 +2,20 @@ import logging
|
|
| 2 |
import time
|
| 3 |
from pathlib import Path
|
| 4 |
import contextlib
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
logging.basicConfig(
|
| 7 |
level=logging.INFO,
|
| 8 |
format="%(asctime)s - %(levelname)s - %(message)s",
|
| 9 |
)
|
| 10 |
|
| 11 |
-
|
| 12 |
-
import gradio as gr
|
| 13 |
-
import nltk
|
| 14 |
-
import torch
|
| 15 |
-
|
| 16 |
-
from pdf2text import *
|
| 17 |
-
|
| 18 |
_here = Path(__file__).parent
|
| 19 |
|
| 20 |
nltk.download("stopwords") # TODO=find where this requirement originates from
|
| 21 |
|
| 22 |
-
|
| 23 |
def load_uploaded_file(file_obj, temp_dir: Path = None):
|
| 24 |
"""
|
| 25 |
load_uploaded_file - process an uploaded file
|
|
@@ -52,7 +48,6 @@ def load_uploaded_file(file_obj, temp_dir: Path = None):
|
|
| 52 |
print(f"Trying to load file with path {file_path}, error: {e}")
|
| 53 |
return None
|
| 54 |
|
| 55 |
-
|
| 56 |
def convert_PDF(
|
| 57 |
pdf_obj,
|
| 58 |
language: str = "en",
|
|
@@ -106,7 +101,6 @@ def convert_PDF(
|
|
| 106 |
|
| 107 |
return converted_txt, html, _output_name
|
| 108 |
|
| 109 |
-
|
| 110 |
if __name__ == "__main__":
|
| 111 |
logging.info("Starting app")
|
| 112 |
|
|
@@ -121,7 +115,6 @@ if __name__ == "__main__":
|
|
| 121 |
assume_straight_pages=True,
|
| 122 |
)
|
| 123 |
|
| 124 |
-
# define pdf bytes as None
|
| 125 |
pdf_obj = _here / "try_example_file.pdf"
|
| 126 |
pdf_obj = str(pdf_obj.resolve())
|
| 127 |
_temp_dir = _here / "temp"
|
|
@@ -131,7 +124,6 @@ if __name__ == "__main__":
|
|
| 131 |
demo = gr.Blocks()
|
| 132 |
|
| 133 |
with demo:
|
| 134 |
-
|
| 135 |
gr.Markdown("# PDF to Text")
|
| 136 |
gr.Markdown(
|
| 137 |
"A basic demo of pdf-to-text conversion using OCR from the [doctr](https://mindee.github.io/doctr/index.html) package"
|
|
@@ -139,7 +131,6 @@ if __name__ == "__main__":
|
|
| 139 |
gr.Markdown("---")
|
| 140 |
|
| 141 |
with gr.Column():
|
| 142 |
-
|
| 143 |
gr.Markdown("## Load Inputs")
|
| 144 |
gr.Markdown("Upload your own file & replace the default. Files should be < 10MB to avoid upload issues - search for a PDF compressor online as needed.")
|
| 145 |
gr.Markdown(
|
|
@@ -149,8 +140,8 @@ if __name__ == "__main__":
|
|
| 149 |
uploaded_file = gr.File(
|
| 150 |
label="Upload a PDF file",
|
| 151 |
file_count="single",
|
| 152 |
-
type="
|
| 153 |
-
value=_here / "try_example_file.pdf",
|
| 154 |
)
|
| 155 |
|
| 156 |
gr.Markdown("---")
|
|
@@ -166,7 +157,7 @@ if __name__ == "__main__":
|
|
| 166 |
text_file = gr.File(
|
| 167 |
label="Download Text File",
|
| 168 |
file_count="single",
|
| 169 |
-
type="
|
| 170 |
interactive=False,
|
| 171 |
)
|
| 172 |
|
|
@@ -175,4 +166,4 @@ if __name__ == "__main__":
|
|
| 175 |
inputs=[uploaded_file],
|
| 176 |
outputs=[OCR_text, out_placeholder, text_file],
|
| 177 |
)
|
| 178 |
-
demo.launch(enable_queue=True)
|
|
|
|
| 2 |
import time
|
| 3 |
from pathlib import Path
|
| 4 |
import contextlib
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import nltk
|
| 7 |
+
import torch
|
| 8 |
+
from pdf2text import *
|
| 9 |
|
| 10 |
logging.basicConfig(
|
| 11 |
level=logging.INFO,
|
| 12 |
format="%(asctime)s - %(levelname)s - %(message)s",
|
| 13 |
)
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
_here = Path(__file__).parent
|
| 16 |
|
| 17 |
nltk.download("stopwords") # TODO=find where this requirement originates from
|
| 18 |
|
|
|
|
| 19 |
def load_uploaded_file(file_obj, temp_dir: Path = None):
|
| 20 |
"""
|
| 21 |
load_uploaded_file - process an uploaded file
|
|
|
|
| 48 |
print(f"Trying to load file with path {file_path}, error: {e}")
|
| 49 |
return None
|
| 50 |
|
|
|
|
| 51 |
def convert_PDF(
|
| 52 |
pdf_obj,
|
| 53 |
language: str = "en",
|
|
|
|
| 101 |
|
| 102 |
return converted_txt, html, _output_name
|
| 103 |
|
|
|
|
| 104 |
if __name__ == "__main__":
|
| 105 |
logging.info("Starting app")
|
| 106 |
|
|
|
|
| 115 |
assume_straight_pages=True,
|
| 116 |
)
|
| 117 |
|
|
|
|
| 118 |
pdf_obj = _here / "try_example_file.pdf"
|
| 119 |
pdf_obj = str(pdf_obj.resolve())
|
| 120 |
_temp_dir = _here / "temp"
|
|
|
|
| 124 |
demo = gr.Blocks()
|
| 125 |
|
| 126 |
with demo:
|
|
|
|
| 127 |
gr.Markdown("# PDF to Text")
|
| 128 |
gr.Markdown(
|
| 129 |
"A basic demo of pdf-to-text conversion using OCR from the [doctr](https://mindee.github.io/doctr/index.html) package"
|
|
|
|
| 131 |
gr.Markdown("---")
|
| 132 |
|
| 133 |
with gr.Column():
|
|
|
|
| 134 |
gr.Markdown("## Load Inputs")
|
| 135 |
gr.Markdown("Upload your own file & replace the default. Files should be < 10MB to avoid upload issues - search for a PDF compressor online as needed.")
|
| 136 |
gr.Markdown(
|
|
|
|
| 140 |
uploaded_file = gr.File(
|
| 141 |
label="Upload a PDF file",
|
| 142 |
file_count="single",
|
| 143 |
+
type="filepath",
|
| 144 |
+
value=str(_here / "try_example_file.pdf"),
|
| 145 |
)
|
| 146 |
|
| 147 |
gr.Markdown("---")
|
|
|
|
| 157 |
text_file = gr.File(
|
| 158 |
label="Download Text File",
|
| 159 |
file_count="single",
|
| 160 |
+
type="filepath",
|
| 161 |
interactive=False,
|
| 162 |
)
|
| 163 |
|
|
|
|
| 166 |
inputs=[uploaded_file],
|
| 167 |
outputs=[OCR_text, out_placeholder, text_file],
|
| 168 |
)
|
| 169 |
+
demo.launch(enable_queue=True)
|