Spaces:

kasper-boy
/

OCR_PDF_DOWNLOAD

Runtime error

App Files Files Community

kasper-boy commited on Jun 12, 2024

Commit

7992cef

verified ·

1 Parent(s): a224b87

Update main.py

Browse files

Files changed (1) hide show

main.py +8 -17

main.py CHANGED Viewed

@@ -2,24 +2,20 @@ import logging
 import time
 from pathlib import Path
 import contextlib
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(message)s",
 )
-import gradio as gr
-import nltk
-import torch
-from pdf2text import *
 _here = Path(__file__).parent
 nltk.download("stopwords")  # TODO=find where this requirement originates from
 def load_uploaded_file(file_obj, temp_dir: Path = None):
     """
     load_uploaded_file - process an uploaded file
@@ -52,7 +48,6 @@ def load_uploaded_file(file_obj, temp_dir: Path = None):
         print(f"Trying to load file with path {file_path}, error: {e}")
         return None
 def convert_PDF(
     pdf_obj,
     language: str = "en",
@@ -106,7 +101,6 @@ def convert_PDF(
     return converted_txt, html, _output_name
 if __name__ == "__main__":
     logging.info("Starting app")
@@ -121,7 +115,6 @@ if __name__ == "__main__":
             assume_straight_pages=True,
         )
-    # define pdf bytes as None
     pdf_obj = _here / "try_example_file.pdf"
     pdf_obj = str(pdf_obj.resolve())
     _temp_dir = _here / "temp"
@@ -131,7 +124,6 @@ if __name__ == "__main__":
     demo = gr.Blocks()
     with demo:
         gr.Markdown("# PDF to Text")
         gr.Markdown(
             "A basic demo of pdf-to-text conversion using OCR from the [doctr](https://mindee.github.io/doctr/index.html) package"
@@ -139,7 +131,6 @@ if __name__ == "__main__":
         gr.Markdown("---")
         with gr.Column():
             gr.Markdown("## Load Inputs")
             gr.Markdown("Upload your own file & replace the default. Files should be < 10MB to avoid upload issues - search for a PDF compressor online as needed.")
             gr.Markdown(
@@ -149,8 +140,8 @@ if __name__ == "__main__":
             uploaded_file = gr.File(
                 label="Upload a PDF file",
                 file_count="single",
-                type="file",
-                value=_here / "try_example_file.pdf",
             )
             gr.Markdown("---")
@@ -166,7 +157,7 @@ if __name__ == "__main__":
             text_file = gr.File(
                 label="Download Text File",
                 file_count="single",
-                type="file",
                 interactive=False,
             )
@@ -175,4 +166,4 @@ if __name__ == "__main__":
             inputs=[uploaded_file],
             outputs=[OCR_text, out_placeholder, text_file],
         )
-    demo.launch(enable_queue=True)

 import time
 from pathlib import Path
 import contextlib
+import gradio as gr
+import nltk
+import torch
+from pdf2text import *
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(message)s",
 )
 _here = Path(__file__).parent
 nltk.download("stopwords")  # TODO=find where this requirement originates from
 def load_uploaded_file(file_obj, temp_dir: Path = None):
     """
     load_uploaded_file - process an uploaded file
         print(f"Trying to load file with path {file_path}, error: {e}")
         return None
 def convert_PDF(
     pdf_obj,
     language: str = "en",
     return converted_txt, html, _output_name
 if __name__ == "__main__":
     logging.info("Starting app")
             assume_straight_pages=True,
         )
     pdf_obj = _here / "try_example_file.pdf"
     pdf_obj = str(pdf_obj.resolve())
     _temp_dir = _here / "temp"
     demo = gr.Blocks()
     with demo:
         gr.Markdown("# PDF to Text")
         gr.Markdown(
             "A basic demo of pdf-to-text conversion using OCR from the [doctr](https://mindee.github.io/doctr/index.html) package"
         gr.Markdown("---")
         with gr.Column():
             gr.Markdown("## Load Inputs")
             gr.Markdown("Upload your own file & replace the default. Files should be < 10MB to avoid upload issues - search for a PDF compressor online as needed.")
             gr.Markdown(
             uploaded_file = gr.File(
                 label="Upload a PDF file",
                 file_count="single",
+                type="filepath",
+                value=str(_here / "try_example_file.pdf"),
             )
             gr.Markdown("---")
             text_file = gr.File(
                 label="Download Text File",
                 file_count="single",
+                type="filepath",
                 interactive=False,
             )
             inputs=[uploaded_file],
             outputs=[OCR_text, out_placeholder, text_file],
         )
+    demo.launch(enable_queue=True)