Spaces:

venkatl
/

pdf-util

Sleeping

App Files Files Community

venkatl commited on Sep 25, 2024

Commit

aa3e00e

1 Parent(s): e685745

v0.1

Browse files

Files changed (1) hide show

app.py +73 -55

app.py CHANGED Viewed

@@ -39,10 +39,6 @@ def delete_pdf(pdf_file_name):
 # Parse PDF
 def parse_pdf(pdf_file_name, page_numbers, search_strings, rect_coords):
     doc = fitz.open(os.path.join(PDF_STORAGE_DIR, pdf_file_name))
-    if not page_numbers:
-        page_numbers = [0]
-    else:
-        page_numbers = [int(i) for i in page_numbers.split(',')]
     pages = [doc.load_page(i) for i in page_numbers]
     output = ""
@@ -51,20 +47,18 @@ def parse_pdf(pdf_file_name, page_numbers, search_strings, rect_coords):
     for page in pages:
         output += "### Page %d\n" % page.number
         output += page.get_text() + "\n\n"
-    # Search strings
-    output += "\n### Search Results:\n"
-    for search_string in search_strings:
-        for page in pages:
-            rect = page.search_for(search_string)
-            if rect:
-                output += f"{search_string} - {rect}\n"
-            else:
-                output += f"{search_string} not found\n"
-        output += "\n"
-    # Rectangle coordinates
     if rect_coords:
         x1, y1, x2, y2 = map(float, rect_coords.split(','))
         rect = fitz.Rect(x1, y1, x2, y2)
@@ -73,49 +67,73 @@ def parse_pdf(pdf_file_name, page_numbers, search_strings, rect_coords):
             output += page.get_text("text", clip=rect) + "\n\n"
     return output
-# Main app
 def main():
-    st.write("### PDF Manager")
-    # Container for download/upload/delete
-    with st.container():
-        st.write("#### Manage PDF Files")
-        col1, col2 = st.columns(2)
-        # Download PDF
-        with col1:
-            url = st.text_input("Enter PDF URL")
-            if st.button("Download PDF"):
-                pdf_file_name = download_pdf(url)
-                st.success(f"PDF downloaded: {pdf_file_name}")
-    # Delete PDF
-        with col2:
             pdf_files = os.listdir(PDF_STORAGE_DIR)
-            pdf_file_name = st.selectbox("Select PDF file to delete", pdf_files)
-            if st.button("Delete PDF"):
-                output = delete_pdf(pdf_file_name)
-                st.success(output)
-    with st.container():
-        st.write("# Upload PDF")
-        # Upload PDF
-        pdf_file = st.file_uploader("Select PDF file")
-        if pdf_file:
-            pdf_file_name = upload_pdf(pdf_file)
-            st.success(f"PDF uploaded: {pdf_file_name}")
-    # Container for parsing
-    with st.container():
-        st.write("#### Parse PDF")
-        pdf_files = os.listdir(PDF_STORAGE_DIR)
-        pdf_file_name = st.selectbox("Select PDF file to parse", pdf_files)
-        page_numbers = st.text_input("Enter page numbers (comma-separated)", value="")
-        search_strings = st.text_input("Enter search strings (comma-separated)")
-        rect_coords = st.text_input("Enter rectangle coordinates (x1,y1,x2,y2)")
-        if st.button("Parse PDF"):
-            search_strings = [s.strip() for s in search_strings.split(',')]
-            output = parse_pdf(pdf_file_name, page_numbers, search_strings, rect_coords)
-            st.write(output)
 if __name__ == "__main__":
     main()

 # Parse PDF
 def parse_pdf(pdf_file_name, page_numbers, search_strings, rect_coords):
     doc = fitz.open(os.path.join(PDF_STORAGE_DIR, pdf_file_name))
     pages = [doc.load_page(i) for i in page_numbers]
     output = ""
     for page in pages:
         output += "### Page %d\n" % page.number
         output += page.get_text() + "\n\n"
+    if search_strings:
+        output += "\n### Search Results:\n"
+        for search_string in search_strings:
+            for page in pages:
+                rect = page.search_for(search_string)
+                if rect:
+                    output += f"{search_string} - {rect}\n"
+                else:
+                    output += f"{search_string} not found\n"
+            output += "\n"
     if rect_coords:
         x1, y1, x2, y2 = map(float, rect_coords.split(','))
         rect = fitz.Rect(x1, y1, x2, y2)
             output += page.get_text("text", clip=rect) + "\n\n"
     return output
 def main():
+    st.title("PDF Manager")
+    # Simplified navigation
+    tabs = ["Home", "Extract Text", "Manage PDFs"]
+    tab = st.sidebar.selectbox("Navigation", tabs)
+    if tab == "Home":
+        st.write("### Welcome to PDF Manager!")
+        st.write("This app helps you extract text from PDF files.")
+    elif tab == "Extract Text":
+        st.write("### Extract Text from PDF")
+        with st.form("extract_text_form"):
             pdf_files = os.listdir(PDF_STORAGE_DIR)
+            pdf_file = st.selectbox("Select PDF file to delete", pdf_files)
+            page_numbers = st.text_input("Enter page numbers (comma-separated)")
+            search_strings = st.text_input("Enter search strings (comma-separated)")
+            rect_coords = st.text_input("Enter rectangle coordinates (x1,y1,x2,y2)")
+            submit_button = st.form_submit_button("Extract Text")
+            if submit_button:
+                if not page_numbers:
+                    page_numbers = [0]
+                else:
+                    page_numbers = [int(i) for i in page_numbers.split(',')]
+                if not search_strings:
+                    search_strings = []
+                else:
+                    search_strings = [s.strip() for s in search_strings.split(',')]
+                output = parse_pdf(pdf_file, page_numbers, search_strings, rect_coords)
+                st.write(output)
+    elif tab == "Manage PDFs":
+        st.write("### Manage PDF Files")
+        with st.container():
+            col1, col2 = st.columns(2)
+            # Download PDF
+            with col1:
+                url = st.text_input("Enter PDF URL")
+                if st.button("Download PDF"):
+                    if not url:
+                        st.error("Please enter a URL")
+                    else:
+                        pdf_file_name = download_pdf(url)
+                        st.success(f"PDF downloaded: {pdf_file_name}")
+            # Delete PDF
+            with col2:
+                pdf_files = os.listdir(PDF_STORAGE_DIR)
+                pdf_file_name = st.selectbox("Select PDF file to delete", pdf_files)
+                if st.button("Delete PDF"):
+                    output = delete_pdf(pdf_file_name)
+                    st.success(output)
+        with st.container():
+            st.write("# Upload PDF")
+            # Upload PDF
+            pdf_file = st.file_uploader("Select PDF file")
+            if pdf_file:
+                pdf_file_name = upload_pdf(pdf_file)
+                st.success(f"PDF uploaded: {pdf_file_name}")
 if __name__ == "__main__":
     main()