nougat

Runtime error

App Files Files Community

fsmoreir commited on Jun 5, 2024

Commit

ea18622

verified ·

1 Parent(s): 4805dc8

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -56

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import subprocess
 import uuid
 import os
 import requests
 def get_pdf(pdf_link):
     # Generate a unique filename
@@ -20,6 +22,7 @@ def get_pdf(pdf_link):
         print("Failed to download the PDF.")
     return unique_filename
 def nougat_ocr(file_name):
     # Command to run
     cli_command = [
@@ -29,78 +32,52 @@ def nougat_ocr(file_name):
         '--checkpoint', 'nougat',
         '--markdown'
     ]
     subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-def predict(pdf_file, pdf_link):
-    if pdf_file is None and not pdf_link:
-        return "No data provided. Upload a pdf file or provide a pdf link and try again!", "", ""
-    if pdf_file is not None:
-        file_name = pdf_file.name
     else:
-        file_name = get_pdf(pdf_link)
     # Call nougat
     nougat_ocr(file_name)
     # Open the file for reading
     file_name = file_name.split('/')[-1][:-4]
-    output_path = f'output/{file_name}.mmd'
-    with open(output_path, 'r') as file:
         content = file.read()
-    # Switch math delimiters
     content = content.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
-    return content, content, output_path
 css = """
   #mkd {
-    height: 500px;
-    overflow: auto;
-    border: 1px solid #ccc;
   }
 """
-# JavaScript function to copy content to clipboard automatically
-js_auto_copy = """
-function autoCopyToClipboard() {
-    var content = document.getElementById('markdown-content').value;
-    navigator.clipboard.writeText(content).then(function() {
-        console.log('Content copied to clipboard');
-    }).catch(function(error) {
-        console.error('Error copying content to clipboard: ', error);
-    });
-}
-"""
 with gr.Blocks(css=css) as demo:
-    gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents<center><h1>")
-    gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>")
-    with gr.Row():
-        gr.Markdown('<h4><center>Upload a PDF</center></h4>', scale=1)
-        gr.Markdown('<h4><center><i>OR</i></center></h4>', scale=1)
-        gr.Markdown('<h4><center>Provide a PDF link</center></h4>', scale=1)
-    with gr.Row(equal_height=True):
-        pdf_file = gr.File(label='PDF📃', file_count='single', scale=1)
-        pdf_link = gr.Textbox(placeholder='Enter an Arxiv link here', label='PDF link🔗🌐', scale=1)
-    with gr.Row():
-        btn = gr.Button('Run NOUGAT🍫')
-        clr = gr.Button('Clear🚿')
-        dwn = gr.Button('Download📥')
-    output_headline = gr.Markdown("<h3>PDF converted to markup language through Nougat-OCR👇:</h3>")
-    parsed_output = gr.Markdown(elem_id='mkd', value='📃🔤OCR Output')
-    markdown_hidden = gr.Textbox(elem_id='markdown-content', visible=False)
-    download_link = gr.File(elem_id='download-link', visible=False)
-    btn.click(fn=predict, inputs=[pdf_file, pdf_link], outputs=[parsed_output, markdown_hidden, download_link], _js=js_auto_copy)
-    clr.click(lambda: (gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None)), [], [pdf_file, pdf_link, parsed_output, markdown_hidden, download_link])
-    dwn.click(None, [download_link], None)
-# Enable queueing for request handling
-demo.queue()
-# Launch the interface in debug mode
-demo.launch(debug=True)

 import uuid
 import os
 import requests
+import re
 def get_pdf(pdf_link):
     # Generate a unique filename
         print("Failed to download the PDF.")
     return unique_filename
 def nougat_ocr(file_name):
     # Command to run
     cli_command = [
         '--checkpoint', 'nougat',
         '--markdown'
     ]
+    # Run the command and capture its output
     subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+def predict(pdf_file, pdf_link):
+    if pdf_file is None:
+        if pdf_link == '':
+            print("No file is uploaded and No link is provided")
+            return "No data provided. Upload a pdf file or provide a pdf link and try again!"
+        else:
+            print(f'pdf_link is - {pdf_link}')
+            file_name = get_pdf(pdf_link)
+            print(f'file_name is - {file_name}')
     else:
+        file_name = pdf_file.name
+        print(file_name)
+        pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
+        print(pdf_name)
     # Call nougat
     nougat_ocr(file_name)
     # Open the file for reading
     file_name = file_name.split('/')[-1][:-4]
+    mmd_file_path = f'output/{file_name}.mmd'
+    with open(mmd_file_path, 'r') as file:
         content = file.read()
+    # switch math delimiters
     content = content.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
+    return content, mmd_file_path
+def process_example(pdf_file, pdf_link):
+    ocr_content, _ = predict(pdf_file, pdf_link)
+    return gr.update(value=ocr_content)
 css = """
   #mkd {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
   }
 """
 with gr.Blocks(css=css) as demo:
+    gr.HTML("<h1><